This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 92577f45d3 [fix] (recover) fix can not recover a BE's tablet after deleting its data directory manual (#20273) (#20274) 92577f45d3 is described below commit 92577f45d388682a63aa1d56f3134b28745e61f6 Author: yujun <yu.jun.re...@gmail.com> AuthorDate: Wed Jun 7 22:27:50 2023 +0800 [fix] (recover) fix can not recover a BE's tablet after deleting its data directory manual (#20273) (#20274) --- be/src/common/config.cpp | 4 +++ be/src/common/config.h | 4 +++ be/src/olap/olap_server.cpp | 79 ++++++++++++++++++++++++++++++++++++++++++ be/src/olap/storage_engine.h | 4 +++ be/src/olap/tablet.cpp | 32 ++++++++++++++--- be/src/olap/tablet.h | 8 ++++- be/src/olap/tablet_manager.cpp | 2 +- 7 files changed, 126 insertions(+), 7 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index fb316ad7c2..20f404efdd 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -998,6 +998,10 @@ DEFINE_Int32(max_depth_of_expr_tree, "600"); // Report a tablet as bad when io errors occurs more than this value. DEFINE_mInt64(max_tablet_io_errors, "-1"); +// Report a tablet as bad when its path not found +DEFINE_mInt32(tablet_path_check_interval_seconds, "60"); +DEFINE_mInt32(tablet_path_check_batch_size, "1000"); + // Page size of row column, default 4KB DEFINE_mInt64(row_column_page_size, "4096"); // it must be larger than or equal to 5MB diff --git a/be/src/common/config.h b/be/src/common/config.h index ff6003b43e..fde9c69718 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1014,6 +1014,10 @@ DECLARE_Int32(max_depth_of_expr_tree); // Report a tablet as bad when io errors occurs more than this value. DECLARE_mInt64(max_tablet_io_errors); +// Report a tablet as bad when its path not found +DECLARE_mInt32(tablet_path_check_interval_seconds); +DECLARE_mInt32(tablet_path_check_batch_size); + // Page size of row column, default 4KB DECLARE_mInt64(row_column_page_size); // it must be larger than or equal to 5MB diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 26947f6261..4e300c5460 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -167,6 +167,11 @@ Status StorageEngine::start_bg_threads() { &_tablet_checkpoint_tasks_producer_thread)); LOG(INFO) << "tablet checkpoint tasks producer thread started"; + RETURN_IF_ERROR(Thread::create( + "StorageEngine", "tablet_path_check_thread", + [this]() { this->_tablet_path_check_callback(); }, &_tablet_path_check_thread)); + LOG(INFO) << "tablet path check thread started"; + // fd cache clean thread RETURN_IF_ERROR(Thread::create( "StorageEngine", "fd_cache_clean_thread", @@ -396,6 +401,80 @@ void StorageEngine::_tablet_checkpoint_callback(const std::vector<DataDir*>& dat } while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval))); } +void StorageEngine::_tablet_path_check_callback() { + struct TabletIdComparator { + bool operator()(Tablet* a, Tablet* b) { return a->tablet_id() < b->tablet_id(); } + }; + + using TabletQueue = std::priority_queue<Tablet*, std::vector<Tablet*>, TabletIdComparator>; + int64_t last_tablet_id; + + int64_t interval; + do { + interval = config::tablet_path_check_interval_seconds; + int32_t batch_size = config::tablet_path_check_batch_size; + if (batch_size <= 0) { + if (_stop_background_threads_latch.wait_for(std::chrono::seconds(interval))) { + break; + } + continue; + } + + LOG(INFO) << "start to check tablet path"; + + auto all_tablets = _tablet_manager->get_all_tablet( + [](Tablet* t) { return t->is_used() && t->tablet_state() == TABLET_RUNNING; }); + + TabletQueue big_id_tablets; + TabletQueue small_id_tablets; + for (auto tablet : all_tablets) { + auto tablet_id = tablet->tablet_id(); + TabletQueue* belong_tablets = nullptr; + if (tablet_id > last_tablet_id) { + if (big_id_tablets.size() < batch_size || + big_id_tablets.top()->tablet_id() > tablet_id) { + belong_tablets = &big_id_tablets; + } + } else if (big_id_tablets.size() < batch_size) { + if (small_id_tablets.size() < batch_size || + small_id_tablets.top()->tablet_id() > tablet_id) { + belong_tablets = &small_id_tablets; + } + } + if (belong_tablets != nullptr) { + belong_tablets->push(tablet.get()); + if (belong_tablets->size() > batch_size) { + belong_tablets->pop(); + } + } + } + + int32_t need_small_id_tablet_size = + batch_size - static_cast<int32_t>(big_id_tablets.size()); + + if (!big_id_tablets.empty()) { + last_tablet_id = big_id_tablets.top()->tablet_id(); + } + while (!big_id_tablets.empty()) { + big_id_tablets.top()->check_tablet_path_exists(); + big_id_tablets.pop(); + } + + if (!small_id_tablets.empty() && need_small_id_tablet_size > 0) { + while (static_cast<int32_t>(small_id_tablets.size()) > need_small_id_tablet_size) { + small_id_tablets.pop(); + } + + last_tablet_id = small_id_tablets.top()->tablet_id(); + while (!small_id_tablets.empty()) { + small_id_tablets.top()->check_tablet_path_exists(); + small_id_tablets.pop(); + } + } + + } while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval))); +} + void StorageEngine::_adjust_compaction_thread_num() { if (_base_compaction_thread_pool->max_threads() != config::max_base_compaction_threads) { int old_max_threads = _base_compaction_thread_pool->max_threads(); diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index 49fec3b5bc..42e7bfdef7 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -261,6 +261,8 @@ private: void _tablet_checkpoint_callback(const std::vector<DataDir*>& data_dirs); + void _tablet_path_check_callback(); + // parse the default rowset type config to RowsetTypePB void _parse_default_rowset_type(); @@ -378,6 +380,8 @@ private: std::vector<scoped_refptr<Thread>> _path_scan_threads; // thread to produce tablet checkpoint tasks scoped_refptr<Thread> _tablet_checkpoint_tasks_producer_thread; + // thread to check tablet path + scoped_refptr<Thread> _tablet_path_check_thread; // thread to clean tablet lookup cache scoped_refptr<Thread> _lookup_cache_clean_thread; diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 0b88dbf3d0..bd925672d0 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -233,6 +233,7 @@ Tablet::Tablet(TabletMetaSharedPtr tablet_meta, DataDir* data_dir, _last_checkpoint_time(0), _cumulative_compaction_type(cumulative_compaction_type), _is_clone_occurred(false), + _is_tablet_path_exists(true), _last_missed_version(-1), _last_missed_time_s(0) { // construct _timestamped_versioned_tracker from rs and stale rs meta @@ -1175,6 +1176,17 @@ void Tablet::delete_all_files() { } } +void Tablet::check_tablet_path_exists() { + if (!tablet_path().empty()) { + std::error_code ec; + if (std::filesystem::is_directory(tablet_path(), ec)) { + _is_tablet_path_exists.store(true, std::memory_order_relaxed); + } else if (ec.value() == ENOENT || ec.value() == 0) { + _is_tablet_path_exists.store(false, std::memory_order_relaxed); + } + } +} + bool Tablet::check_path(const std::string& path_to_check) const { std::shared_lock rdlock(_meta_lock); if (path_to_check == _tablet_path) { @@ -1548,7 +1560,8 @@ bool Tablet::_contains_rowset(const RowsetId rowset_id) { // need check if consecutive version missing in full report // alter tablet will ignore this check void Tablet::build_tablet_report_info(TTabletInfo* tablet_info, - bool enable_consecutive_missing_check) { + bool enable_consecutive_missing_check, + bool enable_path_check) { std::shared_lock rdlock(_meta_lock); tablet_info->__set_tablet_id(_tablet_meta->tablet_id()); tablet_info->__set_schema_hash(_tablet_meta->schema_hash()); @@ -1598,10 +1611,19 @@ void Tablet::build_tablet_report_info(TTabletInfo* tablet_info, // and perform state modification operations. } - if ((has_version_cross || is_io_error_too_times()) && tablet_state() == TABLET_RUNNING) { - LOG(INFO) << "report " << full_name() << " as bad, version_cross=" << has_version_cross - << ", ioe times=" << get_io_error_times(); - tablet_info->__set_used(false); + if (tablet_state() == TABLET_RUNNING) { + if (has_version_cross || is_io_error_too_times()) { + LOG(INFO) << "report " << full_name() << " as bad, version_cross=" << has_version_cross + << ", ioe times=" << get_io_error_times(); + tablet_info->__set_used(false); + } + + if (enable_path_check) { + if (!_is_tablet_path_exists.exchange(true, std::memory_order_relaxed)) { + LOG(INFO) << "report " << full_name() << " as bad, tablet directory not found"; + tablet_info->__set_used(false); + } + } } if (tablet_state() == TABLET_SHUTDOWN) { diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index 6608f5864d..9d2deeb5cc 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -246,6 +246,8 @@ public: void delete_all_files(); + void check_tablet_path_exists(); + bool check_path(const std::string& check_path) const; bool check_rowset_id(const RowsetId& rowset_id); @@ -278,7 +280,8 @@ public: bool rowset_meta_is_useful(RowsetMetaSharedPtr rowset_meta); void build_tablet_report_info(TTabletInfo* tablet_info, - bool enable_consecutive_missing_check = false); + bool enable_consecutive_missing_check = false, + bool enable_path_check = false); void generate_tablet_meta_copy(TabletMetaSharedPtr new_tablet_meta) const; // caller should hold the _meta_lock before calling this method @@ -629,6 +632,9 @@ private: // whether clone task occurred during the tablet is in thread pool queue to wait for compaction std::atomic<bool> _is_clone_occurred; + // use a seperate thread to check all tablets paths existance + std::atomic<bool> _is_tablet_path_exists; + int64_t _last_missed_version; int64_t _last_missed_time_s; diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index aa66a4c480..085159b7ae 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -911,7 +911,7 @@ Status TabletManager::build_all_report_tablets_info(std::map<TTabletId, TTablet> for (auto& tablet : tablets) { auto& t_tablet = (*tablets_info)[tablet->tablet_id()]; TTabletInfo& tablet_info = t_tablet.tablet_infos.emplace_back(); - tablet->build_tablet_report_info(&tablet_info, true); + tablet->build_tablet_report_info(&tablet_info, true, true); // find expired transaction corresponding to this tablet TabletInfo tinfo(tablet->tablet_id(), tablet->schema_hash(), tablet->tablet_uid()); auto find = expire_txn_map.find(tinfo); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org