Author: Frank Ch. Eigler
Date: Fri Sep 22 15:30:51 2023 -0400
PR30879: intermittent debuginfod crash with unhandled exception
Code inspection identified two places where sqlite_ps objects were
being created/used outside try/catch protection. This patch wraps or
replaces them.
* configure.ac: Look for glibc backtrace headers.
* debuginfod.cxx (scan): New function wrapped by a try/catch loop.
(sqlite_checkpoint_pb): Use non-exception-producing sqlite functions.
(main, my_terminate_handler): New terminate() handler.
diff --git a/configure.ac b/configure.ac
index 4b67c84425fa..29ed32feaee6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -839,6 +839,7 @@ AS_IF([test "x$enable_libdebuginfod" = "xdummy"],
[AC_DEFINE([DUMMY_LIBDEBUGINFOD], [1], [Build dummy libdebuginfod])])
AM_CONDITIONAL([LIBDEBUGINFOD],[test "x$enable_libdebuginfod" = "xyes" || test
"x$enable_libdebuginfod" = "xdummy"])
AM_CONDITIONAL([DUMMY_LIBDEBUGINFOD],[test "x$enable_libdebuginfod" =
"xdummy"])
+AC_CHECK_HEADERS([execinfo.h])
# Look for libmicrohttpd, libarchive, sqlite for debuginfo server
# minimum versions as per rhel7.
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index d72d2ad16960..e53228803bb0 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -44,6 +44,12 @@ extern "C" {
}
#endif
+#ifdef HAVE_EXECINFO_H
+extern "C" {
+#include
+}
+#endif
+
extern "C" {
#include "printversion.h"
#include "system.h"
@@ -95,6 +101,7 @@ extern "C" {
#include
#include
#include
+#include
#include
// #include // on rhel7 gcc 4.8, not competent
#include
@@ -1152,22 +1159,13 @@ struct sqlite_ps
struct sqlite_checkpoint_pb: public periodic_barrier
{
- sqlite_ps ckpt;
-
+ // NB: don't use sqlite_ps since it can throw exceptions during ctor etc.
sqlite_checkpoint_pb(unsigned t, unsigned p):
-periodic_barrier(t, p), ckpt(db, "periodic wal checkpoint",
- "pragma wal_checkpoint(truncate);") {}
+periodic_barrier(t, p) { }
void periodic_barrier_work() noexcept
{
-try
- {
-ckpt.reset().step_ok_done();
- }
-catch (const reportable_exception& e)
- {
-e.report(clog);
- }
+(void) sqlite3_exec (db, "pragma wal_checkpoint(truncate);", NULL, NULL,
NULL);
}
};
@@ -3714,11 +3712,9 @@ scan_archive_file (const string& rps, const stat_t& st,
// The thread that consumes file names off of the scanq. We hold
// the persistent sqlite_ps's at this level and delegate file/archive
// scanning to other functions.
-static void*
-thread_main_scanner (void* arg)
+static void
+scan ()
{
- (void) arg;
-
// all the prepared statements fit to use, the _f_ set:
sqlite_ps ps_f_upsert_buildids (db, "file-buildids-intern", "insert or
ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
sqlite_ps ps_f_upsert_fileparts (db, "file-fileparts-intern", "insert or
ignore into " BUILDIDS "_fileparts VALUES (NULL, ?);");
@@ -3845,8 +3841,25 @@ thread_main_scanner (void* arg)
inc_metric("thread_work_total","role","scan");
}
-
add_metric("thread_busy", "role", "scan", -1);
+}
+
+
+// Use this function as the thread entry point, so it can catch our
+// fleet of exceptions (incl. the sqlite_ps ctors) and report.
+static void*
+thread_main_scanner (void* arg)
+{
+ (void) arg;
+ while (! interrupted)
+try
+ {
+scan();
+ }
+catch (const reportable_exception& e)
+ {
+e.report(cerr);
+ }
return 0;
}
@@ -4359,6 +4372,20 @@ default_concurrency() // guaranteed >= 1
}
+// 30879: Something to help out in case of an uncaught exception.
+void my_terminate_handler()
+{
+#if defined(__GLIBC__)
+ void *array[40];
+ int size = backtrace (array, 40);
+ backtrace_symbols_fd (array, size, STDERR_FILENO);
+#endif
+#if defined(__GLIBCXX__) || defined(__GLIBCPP__)
+ __gnu_cxx::__verbose_terminate_handler();
+#endif
+ abort();
+}
+
int
main (int argc, char *argv[])
@@ -4367,6 +4394,8 @@ main (int argc, char *argv[])
(void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
(void) textdomain (PACKAGE_TARNAME);
+ std::set_terminate(& my_terminate_handler);
+
/* Tell the library which version we are expecting. */
elf_version (EV_CURRENT);