yiguolei commented on code in PR #15624: URL: https://github.com/apache/doris/pull/15624#discussion_r1062178611
########## be/src/runtime/block_spill_manager.cpp: ########## @@ -21,16 +21,28 @@ #include <boost/uuid/uuid_io.hpp> #include <random> +#include "util/file_utils.h" + namespace doris { +static const std::string BLOCK_SPILL_TMP_DIR = "spill"; BlockSpillManager::BlockSpillManager(const std::vector<StorePath>& paths) : _store_paths(paths) {} +Status BlockSpillManager::init() { + for (const auto& path : _store_paths) { + std::string dir = path.path + "/" + BLOCK_SPILL_TMP_DIR; + if (!FileUtils::check_exist(dir)) { + RETURN_IF_ERROR(FileUtils::create_dir(dir)); + } + } + return Status::OK(); +} Status BlockSpillManager::get_writer(int32_t batch_size, vectorized::BlockSpillWriterUPtr& writer) { int64_t id; std::vector<int> indices(_store_paths.size()); std::iota(indices.begin(), indices.end(), 0); std::shuffle(indices.begin(), indices.end(), std::mt19937 {std::random_device {}()}); - std::string path = _store_paths[indices[0]].path; + std::string path = _store_paths[indices[0]].path + "/" + BLOCK_SPILL_TMP_DIR; Review Comment: There will be some garbage if be restart during spill to disk. so that we could add a timestamp prefix here. Then add a gc logic to clear all folders before current timestamp. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org