commit ce695bedf073127883bbbaab528dd1f2b0e955f1 (HEAD -> master) Author: Frank Ch. Eigler <f...@redhat.com> Date: Tue Sep 14 08:15:23 2021 -0400
PR28339: debuginfod: fix groom/scan race condition on just-emptied queue debuginfod's scan and groom operations (thread_main_scanner, thread_main_fts_source_paths) are intended to be mutually exclusive, as a substitute for more complicated sql transaction batching. (This is because scanning / grooming involves inserting or deleting data from multiple related tables.) The workq class that governs this in debuginfod.cxx has a problem: if the workq just becomes empty, its sole entry pulled by a scanner thread in response to a wait_front(), an 'idler' groomer thread is ALSO permitted to run, because there is no indication as to when the scanner thread operation finishes, only when it starts. Extending the workq with a counter ("fronters") to track any active scanning activity (even if the workq is empty) lets us block idlers groomers a little longer. Signed-off-by: Frank Ch. Eigler <f...@redhat.com> diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index c54598239c82..07ddad3019d9 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,11 @@ +2021-09-14 Frank Ch. Eigler <f...@redhat.com> + + PRPR28339 + * debuginfod.cxx (waitq::fronters): New field. + (waitq::wait_idle): Respect it. + (waitq::done_front): New function. + (thread_main_scanner): Call it to match wait_front(). + 2021-08-28 Mark Wielaard <m...@redhat.com> * debuginfod.cxx (parse_opt): Turn the -d arg ":memory:" into diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 3269f657cc8d..6387a27ff820 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -663,10 +663,11 @@ class workq mutex mtx; condition_variable cv; bool dead; - unsigned idlers; + unsigned idlers; // number of threads busy with wait_idle / done_idle + unsigned fronters; // number of threads busy with wait_front / done_front public: - workq() { dead = false; idlers = 0; } + workq() { dead = false; idlers = 0; fronters = 0; } ~workq() {} void push_back(const Payload& p) @@ -690,10 +691,11 @@ class workq unique_lock<mutex> lock(mtx); q.clear(); set_metric("thread_work_pending","role","scan", q.size()); + // NB: there may still be some live fronters cv.notify_all(); // maybe wake up waiting idlers } - // block this scanner thread until there is work to do and no active + // block this scanner thread until there is work to do and no active idler bool wait_front (Payload& p) { unique_lock<mutex> lock(mtx); @@ -705,19 +707,29 @@ class workq { p = * q.begin(); q.erase (q.begin()); + fronters ++; // prevent idlers from starting awhile, even if empty q set_metric("thread_work_pending","role","scan", q.size()); - if (q.size() == 0) - cv.notify_all(); // maybe wake up waiting idlers + // NB: don't wake up idlers yet! The consumer is busy + // processing this element until it calls done_front(). return true; } } + // notify waitq that scanner thread is done with that last item + void done_front () + { + unique_lock<mutex> lock(mtx); + fronters --; + if (q.size() == 0 && fronters == 0) + cv.notify_all(); // maybe wake up waiting idlers + } + // block this idler thread until there is no work to do void wait_idle () { unique_lock<mutex> lock(mtx); cv.notify_all(); // maybe wake up waiting scanners - while (!dead && (q.size() != 0)) + while (!dead && ((q.size() != 0) || fronters > 0)) cv.wait(lock); idlers ++; } @@ -3145,6 +3157,8 @@ thread_main_scanner (void* arg) e.report(cerr); } + scanq.done_front(); // let idlers run + if (fts_cached || fts_executable || fts_debuginfo || fts_sourcefiles || fts_sref || fts_sdef) {} // NB: not just if a successful scan - we might have encountered -ENOSPC & failed (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size