yujun777 commented on code in PR #29818: URL: https://github.com/apache/doris/pull/29818#discussion_r1447367554
########## be/src/olap/storage_engine.cpp: ########## @@ -448,45 +449,85 @@ Status StorageEngine::set_cluster_id(int32_t cluster_id) { return Status::OK(); } +StorageEngine::Disk_remaining_level get_available_level(double disk_usage_percent) { + assert(disk_usage_percent >= 0 && disk_usage_percent <= 1); + if (disk_usage_percent < 0.7) { + return StorageEngine::Disk_remaining_level::LOW; + } else if (disk_usage_percent < 0.85) { + return StorageEngine::Disk_remaining_level::MID; + } else { + return StorageEngine::Disk_remaining_level::HIGH; + } + return StorageEngine::Disk_remaining_level::LOW; +} + std::vector<DataDir*> StorageEngine::get_stores_for_create_tablet( TStorageMedium::type storage_medium) { + struct DirInfo { + DataDir* data_dir; + + StorageEngine::Disk_remaining_level available_level; + + bool operator<(const DirInfo& other) { + if (available_level != other.available_level) { + return available_level < other.available_level; + } + return data_dir->path_hash() < other.data_dir->path_hash(); + } + }; + + std::vector<DirInfo> dir_infos; + int next_index = 0; + std::vector<DataDir*> stores; { std::lock_guard<std::mutex> l(_store_lock); + if (_store_next_index.find(storage_medium) == _store_next_index.end()) { + _store_next_index[storage_medium] = rand() % 100; + } + + next_index = _store_next_index[storage_medium]++; + if (next_index < 0) { + next_index = 0; + _store_next_index[storage_medium] = next_index + 1; + } + for (auto& it : _store_map) { - if (it.second->is_used()) { + DataDir* data_dir = it.second; + if (data_dir->is_used()) { if ((_available_storage_medium_type_count == 1 || - it.second->storage_medium() == storage_medium) && - !it.second->reach_capacity_limit(0)) { - stores.push_back(it.second); + data_dir->storage_medium() == storage_medium) && + !data_dir->reach_capacity_limit(0)) { + size_t disk_available = data_dir->disk_available(); + DirInfo dir_info; + dir_info.data_dir = data_dir; + dir_info.available_level = get_available_level(disk_available); Review Comment: disk_available is the free capacity in bytes in range [0, +00). but get_available_level's input arg is disk usage in [0, 1] -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org