This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 2d6b54e464e [branch-2.0](create tablet) create tablet chose disk round 
robin #29818 #30354 #30508 (#34034)
2d6b54e464e is described below

commit 2d6b54e464e41ff926f2ba68ca63c2efedbc46d2
Author: yujun <yu.jun.re...@gmail.com>
AuthorDate: Wed May 1 21:06:34 2024 +0800

    [branch-2.0](create tablet) create tablet chose disk round robin #29818 
#30354 #30508 (#34034)
---
 be/src/agent/task_worker_pool.cpp                  |   3 +-
 be/src/common/config.cpp                           |   6 +
 be/src/common/config.h                             |   6 +
 be/src/olap/data_dir.cpp                           |   6 +-
 be/src/olap/data_dir.h                             |   7 +
 be/src/olap/storage_engine.cpp                     | 196 ++++++++++++---------
 be/src/olap/storage_engine.h                       |  85 +++++----
 be/src/olap/task/engine_clone_task.cpp             |   2 +-
 .../test_partition_create_tablet_rr.groovy         | 162 +++++++++++++++++
 9 files changed, 358 insertions(+), 115 deletions(-)

diff --git a/be/src/agent/task_worker_pool.cpp 
b/be/src/agent/task_worker_pool.cpp
index 5c9aab3e0ba..bcf8aa93210 100644
--- a/be/src/agent/task_worker_pool.cpp
+++ b/be/src/agent/task_worker_pool.cpp
@@ -2015,7 +2015,8 @@ Status 
StorageMediumMigrateTaskPool::_check_migrate_request(const TStorageMedium
                                          storage_medium);
         }
         // get a random store of specified storage medium
-        auto stores = 
StorageEngine::instance()->get_stores_for_create_tablet(storage_medium);
+        auto stores = StorageEngine::instance()->get_stores_for_create_tablet(
+                tablet->partition_id(), storage_medium);
         if (stores.empty()) {
             return Status::InternalError("failed to get root path for create 
tablet");
         }
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 894b7e113d3..82b85b44421 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1157,6 +1157,12 @@ DEFINE_mString(ca_cert_file_paths,
 // Retry the Open num_retries time waiting 100 milliseconds between retries.
 DEFINE_mInt32(thrift_client_open_num_tries, "1");
 
+// consider two high usage disk at the same available level if they do not 
exceed this diff.
+DEFINE_mDouble(high_disk_avail_level_diff_usages, "0.15");
+
+// create tablet in partition random robin idx lru size, default 10000
+DEFINE_Int32(partition_disk_index_lru_size, "10000");
+
 // clang-format off
 #ifdef BE_TEST
 // test s3
diff --git a/be/src/common/config.h b/be/src/common/config.h
index b2841b46f35..007c5082135 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1209,6 +1209,12 @@ DECLARE_mString(ca_cert_file_paths);
 // Retry the Open num_retries time waiting 100 milliseconds between retries.
 DECLARE_mInt32(thrift_client_open_num_tries);
 
+// consider two high usage disk at the same available level if they do not 
exceed this diff.
+DECLARE_mDouble(high_disk_avail_level_diff_usages);
+
+// create tablet in partition random robin idx lru size, default 10000
+DECLARE_Int32(partition_disk_index_lru_size);
+
 #ifdef BE_TEST
 // test s3
 DECLARE_String(test_s3_resource);
diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp
index f41b2894389..a12c9155439 100644
--- a/be/src/olap/data_dir.cpp
+++ b/be/src/olap/data_dir.cpp
@@ -851,7 +851,8 @@ Status DataDir::update_capacity() {
     disks_total_capacity->set_value(_disk_capacity_bytes);
     disks_avail_capacity->set_value(_available_bytes);
     LOG(INFO) << "path: " << _path << " total capacity: " << 
_disk_capacity_bytes
-              << ", available capacity: " << _available_bytes;
+              << ", available capacity: " << _available_bytes << ", usage: " 
<< get_usage(0)
+              << ", in_use: " << is_used();
 
     return Status::OK();
 }
@@ -890,8 +891,7 @@ size_t DataDir::tablet_num() const {
 }
 
 bool DataDir::reach_capacity_limit(int64_t incoming_data_size) {
-    double used_pct = (_disk_capacity_bytes - _available_bytes + 
incoming_data_size) /
-                      (double)_disk_capacity_bytes;
+    double used_pct = get_usage(incoming_data_size);
     int64_t left_bytes = _available_bytes - incoming_data_size;
     if (used_pct >= config::storage_flood_stage_usage_percent / 100.0 &&
         left_bytes <= config::storage_flood_stage_left_capacity_bytes) {
diff --git a/be/src/olap/data_dir.h b/be/src/olap/data_dir.h
index 51261a19b1b..81c74f3bb2e 100644
--- a/be/src/olap/data_dir.h
+++ b/be/src/olap/data_dir.h
@@ -146,6 +146,13 @@ public:
 
     void disks_compaction_num_increment(int64_t delta);
 
+    double get_usage(int64_t incoming_data_size) const {
+        return _disk_capacity_bytes == 0
+                       ? 0
+                       : (_disk_capacity_bytes - _available_bytes + 
incoming_data_size) /
+                                 (double)_disk_capacity_bytes;
+    }
+
     // Move tablet to trash.
     Status move_to_trash(const std::string& tablet_path);
 
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index 2bd70b57730..9ff97db8291 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -18,6 +18,7 @@
 #include "olap/storage_engine.h"
 
 // IWYU pragma: no_include <bthread/errno.h>
+#include <assert.h>
 #include <errno.h> // IWYU pragma: keep
 #include <fmt/format.h>
 #include <gen_cpp/AgentService_types.h>
@@ -91,7 +92,8 @@ using std::vector;
 
 namespace doris {
 using namespace ErrorCode;
-
+extern void get_round_robin_stores(int64 curr_index, const 
std::vector<DirInfo>& dir_infos,
+                                   std::vector<DataDir*>& stores);
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(unused_rowsets_count, MetricUnit::ROWSETS);
 
 StorageEngine* StorageEngine::_s_instance = nullptr;
@@ -129,7 +131,9 @@ StorageEngine::StorageEngine(const EngineOptions& options)
           _calc_delete_bitmap_executor(nullptr),
           _default_rowset_type(BETA_ROWSET),
           _heartbeat_flags(nullptr),
-          _stream_load_recorder(nullptr) {
+          _stream_load_recorder(nullptr),
+          _create_tablet_idx_lru_cache(
+                  new 
CreateTabletIdxCache(config::partition_disk_index_lru_size)) {
     _s_instance = this;
     REGISTER_HOOK_METRIC(unused_rowsets_count, [this]() {
         // std::lock_guard<std::mutex> lock(_gc_mutex);
@@ -467,106 +471,113 @@ Status StorageEngine::set_cluster_id(int32_t 
cluster_id) {
     return Status::OK();
 }
 
-std::vector<DataDir*> StorageEngine::get_stores_for_create_tablet(
-        TStorageMedium::type storage_medium) {
-    struct DirInfo {
-        DataDir* data_dir;
-
-        size_t disk_available;
-        //if disk_available is high, then available_level is small
-        int available_level;
-
-        int tablet_num;
+int StorageEngine::_get_and_set_next_disk_index(int64 partition_id,
+                                                TStorageMedium::type 
storage_medium) {
+    auto key = CreateTabletIdxCache::get_key(partition_id, storage_medium);
+    int curr_index = _create_tablet_idx_lru_cache->get_index(key);
+    // -1, lru can't find key
+    if (curr_index == -1) {
+        curr_index = std::max(0, _last_use_index[storage_medium] + 1);
+    }
+    _last_use_index[storage_medium] = curr_index;
+    _create_tablet_idx_lru_cache->set_index(key, std::max(0, curr_index + 1));
+    return curr_index;
+}
 
-        bool operator<(const DirInfo& other) const {
-            if (available_level != other.available_level) {
-                return available_level < other.available_level;
-            }
-            if (tablet_num != other.tablet_num) {
-                return tablet_num < other.tablet_num;
+void StorageEngine::_get_candidate_stores(TStorageMedium::type storage_medium,
+                                          std::vector<DirInfo>& dir_infos) {
+    std::vector<double> usages;
+    for (auto& it : _store_map) {
+        DataDir* data_dir = it.second;
+        if (data_dir->is_used()) {
+            if ((_available_storage_medium_type_count == 1 ||
+                 data_dir->storage_medium() == storage_medium) &&
+                !data_dir->reach_capacity_limit(0)) {
+                double usage = data_dir->get_usage(0);
+                DirInfo dir_info;
+                dir_info.data_dir = data_dir;
+                dir_info.usage = usage;
+                dir_info.available_level = 0;
+                usages.push_back(usage);
+                dir_infos.push_back(dir_info);
             }
-            return data_dir->path_hash() < other.data_dir->path_hash();
         }
-    };
-    std::map<size_t, int> available_levels;
-    std::vector<DirInfo> dir_infos;
-    int next_index = 0;
-    size_t max_disk_capacity = 0;
-    {
-        std::lock_guard<std::mutex> l(_store_lock);
-        next_index = _store_next_index[storage_medium]++;
-        if (next_index < 0) {
-            next_index = 0;
-            _store_next_index[storage_medium] = next_index + 1;
+    }
+
+    if (dir_infos.size() <= 1) {
+        return;
+    }
+
+    std::sort(usages.begin(), usages.end());
+    if (usages.back() < 0.7) {
+        return;
+    }
+
+    std::vector<double> level_min_usages;
+    level_min_usages.push_back(usages[0]);
+    for (auto usage : usages) {
+        // usage < 0.7 consider as one level, give a small skew
+        if (usage < 0.7 - (config::high_disk_avail_level_diff_usages / 2.0)) {
+            continue;
         }
-        for (auto& it : _store_map) {
-            DataDir* data_dir = it.second;
-            if (data_dir->is_used()) {
-                if (_available_storage_medium_type_count == 1 ||
-                    data_dir->storage_medium() == storage_medium) {
-                    size_t disk_available = data_dir->disk_available();
-                    DirInfo dir_info;
-                    dir_info.data_dir = data_dir;
-                    dir_info.available_level = disk_available;
-                    dir_infos.push_back(dir_info);
-                    available_levels[disk_available] = 0;
-                    size_t disk_capacity = data_dir->disk_capacity();
-                    if (max_disk_capacity < disk_capacity) {
-                        max_disk_capacity = disk_capacity;
-                    }
-                }
-            }
+
+        // at high usages,  default 15% is one level
+        // for example: there disk usages are:   0.66,  0.72,  0.83
+        // then level_min_usages = [0.66, 0.83], divide disks into 2 levels:  
[0.66, 0.72], [0.83]
+        if (usage >= level_min_usages.back() + 
config::high_disk_avail_level_diff_usages) {
+            level_min_usages.push_back(usage);
         }
     }
+    for (auto& dir_info : dir_infos) {
+        double usage = dir_info.usage;
+        for (size_t i = 1; i < level_min_usages.size() && usage >= 
level_min_usages[i]; i++) {
+            dir_info.available_level++;
+        }
 
-    std::vector<DataDir*> stores;
-    if (dir_infos.empty()) {
-        return stores;
-    }
-
-    // if two disk available diff not exceeds 20% capacity, then they are the 
same available level.
-    size_t same_level_available_diff = std::max<size_t>(max_disk_capacity / 5, 
1);
-    int level = 0;
-    size_t level_start_available = available_levels.rbegin()->first;
-    for (auto rit = available_levels.rbegin(); rit != available_levels.rend(); 
rit++) {
-        if (level_start_available - rit->first >= same_level_available_diff) {
-            level_start_available = rit->first;
-            level++;
+        // when usage is too high, no matter consider balance now,
+        // make it a higher level.
+        // for example, two disks and usages are: 0.85 and 0.92, then let 
tablets fall on the first disk.
+        // by default, storage_flood_stage_usage_percent = 90
+        if (usage > config::storage_flood_stage_usage_percent / 100.0) {
+            dir_info.available_level++;
         }
-        rit->second = level;
     }
+}
 
-    for (auto& dir_info : dir_infos) {
-        dir_info.tablet_num = dir_info.data_dir->tablet_num();
-        dir_info.available_level = available_levels[dir_info.disk_available];
+std::vector<DataDir*> StorageEngine::get_stores_for_create_tablet(
+        int64 partition_id, TStorageMedium::type storage_medium) {
+    std::vector<DirInfo> dir_infos;
+    int curr_index = 0;
+    std::vector<DataDir*> stores;
+    {
+        std::lock_guard<std::mutex> l(_store_lock);
+        curr_index = _get_and_set_next_disk_index(partition_id, 
storage_medium);
+        _get_candidate_stores(storage_medium, dir_infos);
     }
 
     std::sort(dir_infos.begin(), dir_infos.end());
+    get_round_robin_stores(curr_index, dir_infos, stores);
+
+    return stores;
+}
 
-    // Suppose there are five data dirs (D1, D2, D3, D4, D5).
-    // D1/D2/D3 contain 1 tablet,  D4/D5 contain 2 tablets.
-    // If three creating tablets threads simultaneously invoke this function 
to get stores,
-    // then the return stores will be as below:
-    // thread 1: (D1, D2, D3, D4, D5)
-    // thread 2: (D2, D3, D1, D5, D4)
-    // thread 3: (D3, D1, D2, D4, D5)
-    stores.reserve(dir_infos.size());
+// maintain in stores LOW,MID,HIGH level round robin
+void get_round_robin_stores(int64 curr_index, const std::vector<DirInfo>& 
dir_infos,
+                            std::vector<DataDir*>& stores) {
     for (size_t i = 0; i < dir_infos.size();) {
         size_t end = i + 1;
-        while (end < dir_infos.size() && dir_infos[i].tablet_num == 
dir_infos[end].tablet_num &&
+        while (end < dir_infos.size() &&
                dir_infos[i].available_level == dir_infos[end].available_level) 
{
             end++;
         }
         // data dirs [i, end) have the same tablet size, round robin range [i, 
end)
         size_t count = end - i;
         for (size_t k = 0; k < count; k++) {
-            size_t index = i + (k + next_index) % count;
+            size_t index = i + (k + curr_index) % count;
             stores.push_back(dir_infos[index].data_dir);
         }
         i = end;
     }
-
-    return stores;
 }
 
 DataDir* StorageEngine::get_store(const std::string& path) {
@@ -1125,7 +1136,7 @@ Status StorageEngine::create_tablet(const 
TCreateTabletReq& request, RuntimeProf
     std::vector<DataDir*> stores;
     {
         SCOPED_TIMER(ADD_TIMER(profile, "GetStores"));
-        stores = get_stores_for_create_tablet(request.storage_medium);
+        stores = get_stores_for_create_tablet(request.partition_id, 
request.storage_medium);
     }
     if (stores.empty()) {
         return Status::Error<CE_CMD_PARAMS_ERROR>(
@@ -1135,7 +1146,8 @@ Status StorageEngine::create_tablet(const 
TCreateTabletReq& request, RuntimeProf
 }
 
 Status StorageEngine::obtain_shard_path(TStorageMedium::type storage_medium, 
int64_t path_hash,
-                                        std::string* shard_path, DataDir** 
store) {
+                                        std::string* shard_path, DataDir** 
store,
+                                        int64_t partition_id) {
     LOG(INFO) << "begin to process obtain root path. storage_medium=" << 
storage_medium;
 
     if (shard_path == nullptr) {
@@ -1143,7 +1155,7 @@ Status 
StorageEngine::obtain_shard_path(TStorageMedium::type storage_medium, int
                 "invalid output parameter which is null pointer.");
     }
 
-    auto stores = get_stores_for_create_tablet(storage_medium);
+    auto stores = get_stores_for_create_tablet(partition_id, storage_medium);
     if (stores.empty()) {
         return Status::Error<NO_AVAILABLE_ROOT_PATH>(
                 "no available disk can be used to create tablet.");
@@ -1413,4 +1425,30 @@ Status StorageEngine::_persist_broken_paths() {
     return Status::OK();
 }
 
+int CreateTabletIdxCache::get_index(const std::string& key) {
+    auto cache = get();
+    auto lru_handle = cache->lookup(key);
+    if (lru_handle) {
+        Defer release([cache, lru_handle] { cache->release(lru_handle); });
+        auto value = (CacheValue*)cache->value(lru_handle);
+        value->last_visit_time = UnixMillis();
+        VLOG_DEBUG << "use create tablet idx cache key=" << key << " value=" 
<< value->idx;
+        return value->idx;
+    }
+    return -1;
+}
+
+void CreateTabletIdxCache::set_index(const std::string& key, int next_idx) {
+    assert(next_idx >= 0);
+    CacheValue* value = new CacheValue;
+    value->last_visit_time = UnixMillis();
+    value->idx = next_idx;
+    auto deleter = [](const doris::CacheKey& key, void* value) {
+        CacheValue* cache_value = (CacheValue*)value;
+        delete cache_value;
+    };
+    auto lru_handle = get()->insert(key, value, 1, deleter, 
CachePriority::NORMAL, sizeof(int));
+    get()->release(lru_handle);
+}
+
 } // namespace doris
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index a21ca912e55..6c1f18b4c20 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -70,6 +70,8 @@ class TabletManager;
 class Thread;
 class ThreadPool;
 class TxnManager;
+class CreateTabletIdxCache;
+struct DirInfo;
 
 using SegCompactionCandidates = std::vector<segment_v2::SegmentSharedPtr>;
 using SegCompactionCandidatesSharedPtr = 
std::shared_ptr<SegCompactionCandidates>;
@@ -105,9 +107,11 @@ public:
 
     int64_t get_file_or_directory_size(const std::string& file_path);
 
-    // get root path for creating tablet. The returned vector of root path 
should be random,
+    // get root path for creating tablet. The returned vector of root path 
should be round robin,
     // for avoiding that all the tablet would be deployed one disk.
-    std::vector<DataDir*> get_stores_for_create_tablet(TStorageMedium::type 
storage_medium);
+    std::vector<DataDir*> get_stores_for_create_tablet(int64 partition_id,
+                                                       TStorageMedium::type 
storage_medium);
+
     DataDir* get_store(const std::string& path);
 
     uint32_t available_storage_medium_type_count() const {
@@ -125,7 +129,7 @@ public:
     // @param [out] shard_path choose an available root_path to clone new 
tablet
     // @return error code
     Status obtain_shard_path(TStorageMedium::type storage_medium, int64_t 
path_hash,
-                             std::string* shared_path, DataDir** store);
+                             std::string* shared_path, DataDir** store, 
int64_t partition_id);
 
     // Load new tablet to make it effective.
     //
@@ -337,36 +341,12 @@ private:
 
     Status _persist_broken_paths();
 
-private:
-    struct CompactionCandidate {
-        CompactionCandidate(uint32_t nicumulative_compaction_, int64_t 
tablet_id_, uint32_t index_)
-                : nice(nicumulative_compaction_), tablet_id(tablet_id_), 
disk_index(index_) {}
-        uint32_t nice; // priority
-        int64_t tablet_id;
-        uint32_t disk_index = -1;
-    };
+    void _get_candidate_stores(TStorageMedium::type storage_medium,
+                               std::vector<DirInfo>& dir_infos);
 
-    // In descending order
-    struct CompactionCandidateComparator {
-        bool operator()(const CompactionCandidate& a, const 
CompactionCandidate& b) {
-            return a.nice > b.nice;
-        }
-    };
-
-    struct CompactionDiskStat {
-        CompactionDiskStat(std::string path, uint32_t index, bool used)
-                : storage_path(path),
-                  disk_index(index),
-                  task_running(0),
-                  task_remaining(0),
-                  is_used(used) {}
-        const std::string storage_path;
-        const uint32_t disk_index;
-        uint32_t task_running;
-        uint32_t task_remaining;
-        bool is_used;
-    };
+    int _get_and_set_next_disk_index(int64 partition_id, TStorageMedium::type 
storage_medium);
 
+private:
     EngineOptions _options;
     std::mutex _store_lock;
     std::mutex _trash_sweep_lock;
@@ -494,7 +474,50 @@ private:
     // next index for create tablet
     std::map<TStorageMedium::type, int> _store_next_index;
 
+    // next index for create tablet
+    std::map<TStorageMedium::type, int> _last_use_index;
+
+    std::unique_ptr<CreateTabletIdxCache> _create_tablet_idx_lru_cache;
+
     DISALLOW_COPY_AND_ASSIGN(StorageEngine);
 };
 
+// lru cache for create tabelt round robin in disks
+// key: partitionId_medium
+// value: index
+class CreateTabletIdxCache : public LRUCachePolicy {
+public:
+    // get key, delimiter with DELIMITER '-'
+    static std::string get_key(int64_t partition_id, TStorageMedium::type 
medium) {
+        return fmt::format("{}-{}", partition_id, medium);
+    }
+
+    // -1 not found key in lru
+    int get_index(const std::string& key);
+
+    void set_index(const std::string& key, int next_idx);
+
+    struct CacheValue : public LRUCacheValueBase {
+        int idx = 0;
+    };
+
+    CreateTabletIdxCache(size_t capacity)
+            : LRUCachePolicy("CreateTabletRRIdxCache", capacity, 
LRUCacheType::NUMBER,
+                             /*stale_sweep_time_s*/ 30 * 60) {}
+};
+
+struct DirInfo {
+    DataDir* data_dir;
+
+    double usage = 0;
+    int available_level = 0;
+
+    bool operator<(const DirInfo& other) const {
+        if (available_level != other.available_level) {
+            return available_level < other.available_level;
+        }
+        return data_dir->path_hash() < other.data_dir->path_hash();
+    }
+};
+
 } // namespace doris
diff --git a/be/src/olap/task/engine_clone_task.cpp 
b/be/src/olap/task/engine_clone_task.cpp
index 60a4ccd46d1..9d35330bc55 100644
--- a/be/src/olap/task/engine_clone_task.cpp
+++ b/be/src/olap/task/engine_clone_task.cpp
@@ -249,7 +249,7 @@ Status EngineCloneTask::_do_clone() {
         DataDir* store = nullptr;
         RETURN_IF_ERROR(StorageEngine::instance()->obtain_shard_path(
                 _clone_req.storage_medium, _clone_req.dest_path_hash, 
&local_shard_root_path,
-                &store));
+                &store, _clone_req.partition_id));
         auto tablet_dir = fmt::format("{}/{}/{}", local_shard_root_path, 
_clone_req.tablet_id,
                                       _clone_req.schema_hash);
 
diff --git 
a/regression-test/suites/partition_p0/test_partition_create_tablet_rr.groovy 
b/regression-test/suites/partition_p0/test_partition_create_tablet_rr.groovy
new file mode 100644
index 00000000000..f7e77f06f38
--- /dev/null
+++ b/regression-test/suites/partition_p0/test_partition_create_tablet_rr.groovy
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import org.apache.doris.regression.suite.ClusterOptions
+import org.apache.doris.regression.util.NodeType
+import org.apache.doris.regression.suite.SuiteCluster
+
+suite("test_partition_create_tablet_rr") {
+    def options = new ClusterOptions()
+    options.beNum = 1
+    options.feConfigs.add('disable_balance=true')
+    def partition_disk_index_lru_size = 50
+    options.beConfigs += [
+        'report_tablet_interval_seconds=1',
+        'report_disk_state_interval_seconds=1',
+        "partition_disk_index_lru_size=$partition_disk_index_lru_size"
+    ]
+    options.beDisks = ['HDD=4','SSD=4']
+    options.enableDebugPoints()
+
+    def checkTabletOnDiskTabletNumEq = {tbl ->
+        sleep 5000
+
+        def tablets = sql_return_maparray "SHOW TABLETS FROM $tbl"
+        def pathTabletNum = [:]
+        tablets.each {
+            def num = pathTabletNum.get(it.PathHash)
+            if (num) {
+                pathTabletNum.put(it.PathHash, ++num)
+            } else {
+                pathTabletNum.put(it.PathHash, 1)
+            }
+        }
+
+        log.info("table ${tbl} tablet in path ${pathTabletNum.values()}")
+        def count = pathTabletNum.values().stream().distinct().count()
+        assertEquals(count, 1)
+    }
+
+    docker(options) {
+        sleep 2000
+        def single_hdd_tbl = "single_HDD_tbl"
+        def single_ssd_tbl = "single_SDD_tbl"
+        def single_partition_tbl = "single_partition_tbl"
+        sql """drop table if exists $single_hdd_tbl"""
+        sql """drop table if exists $single_ssd_tbl"""
+        sql """drop table if exists $single_partition_tbl"""
+        for (def j = 0; j < partition_disk_index_lru_size + 10; j++) {
+            def tbl = single_partition_tbl + j.toString()
+            sql """drop table if exists $tbl"""
+        }
+        try {
+            // 1. test single partition table
+            //  a. create 1 table, has 100 buckets
+            //  b. check disk's tablet num
+        
+            sql """
+                CREATE TABLE $single_hdd_tbl (
+                    `k1` int(11) NULL,
+                    `k2` int(11) NULL
+                ) DUPLICATE KEY(`k1`, `k2`)
+                COMMENT 'OLAP'
+                DISTRIBUTED BY HASH(`k1`) BUCKETS 12000
+                PROPERTIES (
+                    "replication_num"="1",
+                    "storage_medium" = "HDD"
+                );
+            """
+
+            checkTabletOnDiskTabletNumEq single_hdd_tbl
+
+            sql """
+                CREATE TABLE $single_ssd_tbl (
+                    `k1` int(11) NULL,
+                    `k2` int(11) NULL
+                ) DUPLICATE KEY(`k1`, `k2`)
+                COMMENT 'OLAP'
+                DISTRIBUTED BY HASH(`k1`) BUCKETS 12000
+                PROPERTIES (
+                    "replication_num"="1",
+                    "storage_medium" = "SSD"
+                );
+            """
+            checkTabletOnDiskTabletNumEq single_ssd_tbl
+
+            sql """
+                CREATE TABLE $single_partition_tbl
+                (
+                    k1 DATE,
+                    k2 DECIMAL(10, 2) DEFAULT "10.5",
+                    k3 CHAR(10) COMMENT "string column",
+                    k4 INT NOT NULL DEFAULT "1" COMMENT "int column"
+                )
+                DUPLICATE KEY(k1, k2)
+                PARTITION BY RANGE(k1)
+                (
+                    PARTITION p1 VALUES LESS THAN ("2020-02-01"),
+                    PARTITION p2 VALUES LESS THAN ("2020-03-01"),
+                    PARTITION p3 VALUES LESS THAN ("2020-04-01")
+                )
+                DISTRIBUTED BY HASH(k1) BUCKETS 320
+                PROPERTIES (
+                    "replication_num" = "1"
+                );
+            """
+            checkTabletOnDiskTabletNumEq single_partition_tbl
+
+            // 2. test multi thread create single partition tables
+            //  a. multi thread create partition_disk_index_lru_size + 10 table
+            //  b. check disk's tablet num
+            def futures = []
+            for (def i = 0; i < partition_disk_index_lru_size + 10; i++) {
+                def tblMulti = single_partition_tbl + i.toString()
+                futures.add(thread {
+                            sql """
+                                CREATE TABLE $tblMulti
+                                (
+                                    k1 DATE,
+                                    k2 DECIMAL(10, 2) DEFAULT "10.5",
+                                    k3 CHAR(10) COMMENT "string column",
+                                    k4 INT NOT NULL DEFAULT "1" COMMENT "int 
column"
+                                )
+                                DUPLICATE KEY(k1, k2)
+                                PARTITION BY RANGE(k1)
+                                (
+                                    PARTITION p1 VALUES LESS THAN 
("2020-02-01"),
+                                    PARTITION p2 VALUES LESS THAN 
("2020-03-01"),
+                                    PARTITION p3 VALUES LESS THAN 
("2020-04-01")
+                                )
+                                DISTRIBUTED BY HASH(k1) BUCKETS 320
+                                PROPERTIES (
+                                    "replication_num" = "1"
+                                );
+                            """
+                            checkTabletOnDiskTabletNumEq tblMulti
+                })
+            }
+            futures.each { it.get() }
+        } finally {
+                sql """drop table if exists $single_hdd_tbl"""
+                sql """drop table if exists $single_ssd_tbl"""
+                sql """drop table if exists $single_partition_tbl"""
+                for (def j = 0; j < partition_disk_index_lru_size + 10; j++) {
+                    def tbl = single_partition_tbl + j.toString()
+                    sql """drop table if exists $tbl"""
+                } 
+        }
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to