Hi - This one is important for those who operate large debuginfod servers.
commit 8ce18d0f8a5d7c00dc897f99751d575c8deeae82 (HEAD -> master) Author: Frank Ch. Eigler <f...@redhat.com> Date: Thu Nov 4 13:08:35 2021 -0400 PR28514: debuginfod: limit groom operation times For large databases and many stale files, it was possible to starve rescan operations by numerous groom "nuke" (database delete ops). Under the theory that including new data is at least as important as aging old, we now impose a rough deadline on groom queries. In the process, we discovered that we were commiting some undefined-behaviour sqlite ops (deleting rows while iterating), which may explain some previous heisenbug occurrences. So the groom nuke operations are split into decision & action phases, with associated progress-tracking metrics. Testing the timeout facility requires hand-testing beyond the testsuite (since it requires LARGE databases to show measurable query times). So confirmed this part by hand. Signed-off-by: Frank Ch. Eigler <f...@redhat.com> diff --git a/NEWS b/NEWS index aaef458c9606..2754074a5a22 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,8 @@ debuginfod-client: Default $DEBUGINFOD_URLS is computed from drop-in files /etc/debuginfod/*.urls rather than hardcoded into the /etc/profile.d/debuginfod* scripts. +debuginfod: Limit the duration of groom ops roughly to rescan (-t) times. + Version 0.185 debuginfod-client: Simplify curl handle reuse so downloads which diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index e0616b0d576f..15b2ba40fa0f 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,10 @@ +2021-11-04 Frank Ch. Eigler <f...@redhat.com> + + PR28514 + * debuginfod.cxx (groom): Rework into separate decision/action + phases. Add new metrics to monitor progress. Limit indefinite + operation times to avoid starving rescan. + 2021-10-23 Frank Ch. Eigler <f...@redhat.com> PR28240 diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index d22571ad3041..45981d8d4279 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -3431,16 +3431,30 @@ void groom() clock_gettime (CLOCK_MONOTONIC, &ts_start); // scan for files that have disappeared - sqlite_ps files (db, "check old files", "select s.mtime, s.file, f.name from " - BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files f " - "where f.id = s.file"); - sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?"); - sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?"); - sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned " - "where file = ? and mtime = ?"); + sqlite_ps files (db, "check old files", + "select distinct s.mtime, s.file, f.name from " + BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files f " + "where f.id = s.file"); + // NB: Because _ftime_mtime_scanned can contain both F and + // R records for the same file, this query would return duplicates if the + // DISTINCT qualifier were not there. files.reset(); + + // DECISION TIME - we enumerate stale fileids/mtimes + deque<pair<int64_t,int64_t> > stale_fileid_mtime; + + time_t time_start = time(NULL); while(1) { + // PR28514: limit grooming iteration to O(rescan time), to avoid + // slow filesystem tests over many files locking out rescans for + // too long. + if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s)) + { + inc_metric("groomed_total", "decision", "aborted"); + break; + } + if (interrupted) break; int rc = files.step(); @@ -3458,19 +3472,67 @@ void groom() if ( (regex_groom && reg_exclude && !reg_include) || rc < 0 || (mtime != (int64_t) s.st_mtime) ) { if (verbose > 2) - obatched(clog) << "groom: forgetting file=" << filename << " mtime=" << mtime << endl; - files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done(); - files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done(); - files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done(); + obatched(clog) << "groom: stale file=" << filename << " mtime=" << mtime << endl; + stale_fileid_mtime.push_back(make_pair(fileid,mtime)); inc_metric("groomed_total", "decision", "stale"); + set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size()); } else inc_metric("groomed_total", "decision", "fresh"); + if (sigusr1 != forced_rescan_count) // stop early if scan triggered break; } files.reset(); + // ACTION TIME + + // Now that we know which file/mtime tuples are stale, actually do + // the deletion from the database. Doing this during the SELECT + // iteration above results in undefined behaviour in sqlite, as per + // https://www.sqlite.org/isolation.html + + // We could shuffle stale_fileid_mtime[] here. It'd let aborted + // sequences of nuke operations resume at random locations, instead + // of just starting over. But it doesn't matter much either way, + // as long as we make progress. + + sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?"); + sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?"); + sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned " + "where file = ? and mtime = ?"); + + while (! stale_fileid_mtime.empty()) + { + auto stale = stale_fileid_mtime.front(); + stale_fileid_mtime.pop_front(); + set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size()); + + // PR28514: limit grooming iteration to O(rescan time), to avoid + // slow nuke_* queries over many files locking out rescans for too + // long. We iterate over the files in random() sequence to avoid + // partial checks going over the same set. + if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s)) + { + inc_metric("groomed_total", "action", "aborted"); + break; + } + + if (interrupted) break; + + int64_t fileid = stale.first; + int64_t mtime = stale.second; + files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done(); + files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done(); + files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done(); + inc_metric("groomed_total", "action", "cleaned"); + + if (sigusr1 != forced_rescan_count) // stop early if scan triggered + break; + } + stale_fileid_mtime.clear(); // no need for this any longer + set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size()); + // delete buildids with no references in _r_de or _f_de tables; // cascades to _r_sref & _f_s records sqlite_ps buildids_del (db, "nuke orphan buildids", diff --git a/tests/ChangeLog b/tests/ChangeLog index 46302b5680cd..b791cd7f0d95 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,8 @@ +2021-11-04 Frank Ch. Eigler <f...@redhat.com> + + PR28514 + * run-debuginfod-archive-groom.sh: Look for new groom metric. + 2021-10-23 Frank Ch. Eigler <f...@redhat.com> PR28240 diff --git a/tests/run-debuginfod-archive-groom.sh b/tests/run-debuginfod-archive-groom.sh index 7813ee28b8f2..030e0aa62de4 100755 --- a/tests/run-debuginfod-archive-groom.sh +++ b/tests/run-debuginfod-archive-groom.sh @@ -150,6 +150,8 @@ kill -USR2 $PID1 # groom cycle wait_ready $PORT1 'thread_work_total{role="groom"}' 2 # Expect 4 rpms containing 2 buildids to be deleted by the groom wait_ready $PORT1 'groomed_total{decision="stale"}' 4 +# Expect no more groom actions pending +wait_ready $PORT1 'thread_work_pending{role="groom"}' 0 rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests