JackDrogon commented on code in PR #25921: URL: https://github.com/apache/doris/pull/25921#discussion_r1372727588
########## be/src/olap/data_dir.cpp: ########## @@ -627,74 +626,61 @@ Status DataDir::load() { } void DataDir::perform_path_gc() { - std::unique_lock<std::mutex> lck(_check_path_mutex); - _check_path_cv.wait(lck, [this] { - return _stop_bg_worker || !_all_tablet_schemahash_paths.empty() || - !_all_check_paths.empty(); - }); - if (_stop_bg_worker) { - return; - } - - _perform_path_gc_by_tablet(); - _perform_path_gc_by_rowsetid(); + auto tablet_paths = _perform_path_scan(); + _perform_path_gc_by_tablet(tablet_paths); + _perform_path_gc_by_rowset(tablet_paths); } // gc unused tablet schemahash dir -void DataDir::_perform_path_gc_by_tablet() { - if (_all_tablet_schemahash_paths.empty()) { +void DataDir::_perform_path_gc_by_tablet(std::vector<std::string>& tablet_paths) { + if (_stop_bg_worker) return; + if (tablet_paths.empty()) { return; } - LOG(INFO) << "start to path gc by tablet schemahash."; + LOG(INFO) << "start to path gc by tablet, dir=" << _path; + // Use double pointer to avoid move elements after erase a element + auto forward = tablet_paths.begin(); + auto backward = tablet_paths.end(); int counter = 0; - for (const auto& path : _all_tablet_schemahash_paths) { - ++counter; + do { if (config::path_gc_check_step > 0 && counter % config::path_gc_check_step == 0) { std::this_thread::sleep_for( std::chrono::milliseconds(config::path_gc_check_step_interval_ms)); } + auto& path = *forward; TTabletId tablet_id = -1; TSchemaHash schema_hash = -1; bool is_valid = _tablet_manager->get_tablet_id_and_schema_hash_from_path(path, &tablet_id, &schema_hash); - if (!is_valid) { + if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] { Review Comment: add comment for this decision condition ########## be/src/olap/data_dir.cpp: ########## @@ -627,74 +626,61 @@ Status DataDir::load() { } void DataDir::perform_path_gc() { - std::unique_lock<std::mutex> lck(_check_path_mutex); - _check_path_cv.wait(lck, [this] { - return _stop_bg_worker || !_all_tablet_schemahash_paths.empty() || - !_all_check_paths.empty(); - }); - if (_stop_bg_worker) { - return; - } - - _perform_path_gc_by_tablet(); - _perform_path_gc_by_rowsetid(); + auto tablet_paths = _perform_path_scan(); + _perform_path_gc_by_tablet(tablet_paths); + _perform_path_gc_by_rowset(tablet_paths); } // gc unused tablet schemahash dir -void DataDir::_perform_path_gc_by_tablet() { - if (_all_tablet_schemahash_paths.empty()) { +void DataDir::_perform_path_gc_by_tablet(std::vector<std::string>& tablet_paths) { + if (_stop_bg_worker) return; + if (tablet_paths.empty()) { return; } - LOG(INFO) << "start to path gc by tablet schemahash."; + LOG(INFO) << "start to path gc by tablet, dir=" << _path; + // Use double pointer to avoid move elements after erase a element + auto forward = tablet_paths.begin(); + auto backward = tablet_paths.end(); int counter = 0; - for (const auto& path : _all_tablet_schemahash_paths) { - ++counter; + do { if (config::path_gc_check_step > 0 && counter % config::path_gc_check_step == 0) { std::this_thread::sleep_for( std::chrono::milliseconds(config::path_gc_check_step_interval_ms)); } + auto& path = *forward; TTabletId tablet_id = -1; TSchemaHash schema_hash = -1; bool is_valid = _tablet_manager->get_tablet_id_and_schema_hash_from_path(path, &tablet_id, &schema_hash); - if (!is_valid) { + if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] { LOG(WARNING) << "unknown path:" << path; + --backward; + std::swap(*forward, *backward); continue; } - // should not happen, because already check it is a valid tablet schema hash path in previous step - // so that log fatal here - if (tablet_id < 1 || schema_hash < 1) { - LOG(WARNING) << "invalid tablet id " << tablet_id << " or schema hash " << schema_hash - << ", path=" << path; - continue; - } - TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id); - if (tablet != nullptr) { - // could find the tablet, then skip check it - continue; - } - std::string data_dir_path = - io::Path(path).parent_path().parent_path().parent_path().parent_path(); - DataDir* data_dir = StorageEngine::instance()->get_store(data_dir_path); - if (data_dir == nullptr) { - LOG(WARNING) << "could not find data dir for tablet path " << path; + auto tablet = _tablet_manager->get_tablet(tablet_id); + if (!tablet) { Review Comment: use if initialize ########## be/src/olap/data_dir.cpp: ########## @@ -704,55 +690,115 @@ void DataDir::_perform_path_gc_by_rowsetid() { TSchemaHash schema_hash = -1; bool is_valid = _tablet_manager->get_tablet_id_and_schema_hash_from_path(path, &tablet_id, &schema_hash); - if (!is_valid) { + if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] { LOG(WARNING) << "unknown path:" << path; continue; } - if (tablet_id > 0 && schema_hash > 0) { - // tablet schema hash path or rowset file path - // gc thread should get tablet include deleted tablet - // or it will delete rowset file before tablet is garbage collected + bool exists; Review Comment: add more space for improving readability -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org