platoneko commented on code in PR #25921: URL: https://github.com/apache/doris/pull/25921#discussion_r1373053955
########## be/src/olap/data_dir.cpp: ########## @@ -627,74 +626,61 @@ Status DataDir::load() { } void DataDir::perform_path_gc() { - std::unique_lock<std::mutex> lck(_check_path_mutex); - _check_path_cv.wait(lck, [this] { - return _stop_bg_worker || !_all_tablet_schemahash_paths.empty() || - !_all_check_paths.empty(); - }); - if (_stop_bg_worker) { - return; - } - - _perform_path_gc_by_tablet(); - _perform_path_gc_by_rowsetid(); + auto tablet_paths = _perform_path_scan(); + _perform_path_gc_by_tablet(tablet_paths); + _perform_path_gc_by_rowset(tablet_paths); } // gc unused tablet schemahash dir -void DataDir::_perform_path_gc_by_tablet() { - if (_all_tablet_schemahash_paths.empty()) { +void DataDir::_perform_path_gc_by_tablet(std::vector<std::string>& tablet_paths) { + if (_stop_bg_worker) return; + if (tablet_paths.empty()) { return; } - LOG(INFO) << "start to path gc by tablet schemahash."; + LOG(INFO) << "start to path gc by tablet, dir=" << _path; + // Use double pointer to avoid move elements after erase a element + auto forward = tablet_paths.begin(); + auto backward = tablet_paths.end(); int counter = 0; - for (const auto& path : _all_tablet_schemahash_paths) { - ++counter; + do { if (config::path_gc_check_step > 0 && counter % config::path_gc_check_step == 0) { std::this_thread::sleep_for( std::chrono::milliseconds(config::path_gc_check_step_interval_ms)); } + auto& path = *forward; TTabletId tablet_id = -1; TSchemaHash schema_hash = -1; bool is_valid = _tablet_manager->get_tablet_id_and_schema_hash_from_path(path, &tablet_id, &schema_hash); - if (!is_valid) { + if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] { LOG(WARNING) << "unknown path:" << path; + --backward; + std::swap(*forward, *backward); continue; } - // should not happen, because already check it is a valid tablet schema hash path in previous step - // so that log fatal here - if (tablet_id < 1 || schema_hash < 1) { - LOG(WARNING) << "invalid tablet id " << tablet_id << " or schema hash " << schema_hash - << ", path=" << path; - continue; - } - TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id); - if (tablet != nullptr) { - // could find the tablet, then skip check it - continue; - } - std::string data_dir_path = - io::Path(path).parent_path().parent_path().parent_path().parent_path(); - DataDir* data_dir = StorageEngine::instance()->get_store(data_dir_path); - if (data_dir == nullptr) { - LOG(WARNING) << "could not find data dir for tablet path " << path; + auto tablet = _tablet_manager->get_tablet(tablet_id); + if (!tablet) { Review Comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org