Hi - A little extension lets us process arch-linux archives. Awaiting for some small test .pkg's from the arch folks for the elfutils testsuite. However, hand-testing on severa larger files works!
commit b51ae89befeb81c8b51b15b7168c6e616255b486 (fche/pacman-Z) Author: Frank Ch. Eigler <f...@redhat.com> Date: Wed Feb 5 15:04:18 2020 -0500 debuginfod: generalized archive support Add a '-Z EXT=CMD' option to debuginfod, which lets it scan any given extension and run CMD on it to unwrap distro archives. For example, for arch-linux pacman files, -Z '.tar.zst=zstdcat' lets debuginfod grok debug and source content in split-debuginfo files. diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index 8c97fdcf7085..d812e6d71ff0 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,9 @@ +2020-02-05 Frank Ch. Eigler <f...@redhat.com> + + * debuginfod.cxx (argp options): Add -Z option. + (canonicalized_archive_entry_pathname): New function for + distro-agnostic file name matching/storage. + 2020-01-22 Frank Ch. Eigler <f...@redhat.com> * debuginfod.cxx (dwarf_extract_source_paths): Don't print diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 623dbc593c70..0de6bbaea0ee 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -333,9 +333,10 @@ ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT; static const struct argp_option options[] = { { NULL, 0, NULL, 0, "Scanners:", 1 }, - { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning threads.", 0 }, - { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning threads.", 0 }, - { "scan-deb-dir", 'U', NULL, 0, "Enable DEB scanning threads.", 0 }, + { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning.", 0 }, + { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning.", 0 }, + { "scan-deb-dir", 'U', NULL, 0, "Enable DEB scanning.", 0 }, + { "scan-archive", 'Z', "EXT=CMD", 0, "Enable arbitrary archive scanning.", 0 }, // "source-oci-imageregistry" ... { NULL, 0, NULL, 0, "Options:", 2 }, @@ -428,6 +429,15 @@ parse_opt (int key, char *arg, scan_archives[".deb"]="dpkg-deb --fsys-tarfile"; scan_archives[".ddeb"]="dpkg-deb --fsys-tarfile"; break; + case 'Z': + { + char* extension = strchr(arg, '='); + if (extension) + scan_archives[string(arg, (extension-arg))]=string(extension+1); + else + argp_failure(state, 1, EINVAL, "bad EXT=CMD format"); + } + break; case 'L': traverse_logical = true; break; @@ -1068,6 +1078,25 @@ class libarchive_fdcache static libarchive_fdcache fdcache; +// For security/portability reasons, many distro-package archives have +// a "./" in front of path names; others have nothing, others have +// "/". Canonicalize them all to a single leading "/", with the +// assumption that this matches the dwarf-derived file names too. +string canonicalized_archive_entry_pathname(struct archive_entry *e) +{ + string fn = archive_entry_pathname(e); + if (fn.size() == 0) + return fn; + if (fn[0] == '/') + return fn; + if (fn[0] == '.') + return fn.substr(1); + else + return string("/")+fn; +} + + + static struct MHD_Response* handle_buildid_r_match (int64_t b_mtime, const string& b_source0, @@ -1162,8 +1191,8 @@ handle_buildid_r_match (int64_t b_mtime, if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely continue; - string fn = archive_entry_pathname (e); - if (fn != string(".")+b_source1) + string fn = canonicalized_archive_entry_pathname (e); + if (fn != b_source1) continue; // extract this file to a temporary file @@ -2055,9 +2084,7 @@ archive_classify (const string& rps, string& archive_extension, if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely continue; - string fn = archive_entry_pathname (e); - if (fn.size() > 1 && fn[0] == '.') - fn = fn.substr(1); // trim off the leading '.' + string fn = canonicalized_archive_entry_pathname (e); if (verbose > 3) obatched(clog) << "libarchive checking " << fn << endl; @@ -2764,7 +2791,7 @@ main (int argc, char *argv[]) "unexpected argument: %s", argv[remaining]); if (scan_archives.size()==0 && !scan_files && source_paths.size()>0) - obatched(clog) << "warning: without -F -R -U, ignoring PATHs" << endl; + obatched(clog) << "warning: without -F -R -U -Z, ignoring PATHs" << endl; fdcache.limit(fdcache_fds, fdcache_mbs); @@ -2894,7 +2921,7 @@ main (int argc, char *argv[]) obatched ob(clog); auto& o = ob << "scanning archive types "; for (auto&& arch : scan_archives) - o << arch.first << " "; + o << arch.first << "(" << arch.second << ") "; o << endl; } const char* du = getenv(DEBUGINFOD_URLS_ENV_VAR); diff --git a/doc/ChangeLog b/doc/ChangeLog index 651ea33d4106..36094d002f75 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,7 @@ +2020-02-05 Frank Ch. Eigler <f...@redhat.com> + + * debuginfod.8: Document new -Z flag and tweak other bits. + 2020-01-10 Mark Wielaard <m...@klomp.org> * debuginfod_find_debuginfo.3 (DEBUGINFOD_PROGRESS): Mention progress diff --git a/doc/debuginfod.8 b/doc/debuginfod.8 index 166c7c4590ed..d6561edf7159 100644 --- a/doc/debuginfod.8 +++ b/doc/debuginfod.8 @@ -61,20 +61,22 @@ or ^C .ESAMPLE -If the \fB\-R\fP and/or \fB-U\fP option is given, each file is scanned -as an archive file that may contain ELF/DWARF/source files. If \-R is -given, the will scan RPMs; and/or if \-U is given, they will scan DEB -/ DDEB files. (The terms RPM and DEB and DDEB are used synonymously -as "archives" in diagnostic messages.) Because of complications such -as DWZ-compressed debuginfo, may require \fItwo\fP traversal passes to -identify all source code. Source files for RPMs are only served from -other RPMs, so the caution for \-F does not apply. Note that due to -Debian/Ubuntu packaging policies & mechanisms, debuginfod cannot -resolve source files for DEB/DDEB at all. - -If no PATH is listed, or neither \fB\-F\fP nor \fB\-R\fP nor \fB\-U\fP -option is given, then \fBdebuginfod\fP will simply serve content that -it accumulated into its index in all previous runs. +If any of the \fB\-R\fP, \fB-U\fP, or \fB-Z\fP options is given, each +file is scanned as an archive file that may contain ELF/DWARF/source +files. Archive files are recognized by extension. If \-R is given, +".rpm" files are scanned; if \-D is given, ".deb" and ".ddeb" files +are scanned; if \-Z is given, the listed extensions are scanned. +Because of complications such as DWZ-compressed debuginfo, may require +\fItwo\fP traversal passes to identify all source code. Source files +for RPMs are only served from other RPMs, so the caution for \-F does +not apply. Note that due to Debian/Ubuntu packaging policies & +mechanisms, debuginfod cannot resolve source files for DEB/DDEB at +all. + +If no PATH is listed, or none of the scanning options is given, then +\fBdebuginfod\fP will simply serve content that it accumulated into +its index in all previous runs, and federate to any upstream +debuginfod servers. .SH OPTIONS @@ -91,6 +93,16 @@ Activate RPM patterns in archive scanning. The default is off. .B "\-U" Activate DEB/DDEB patterns in archive scanning. The default is off. +.TP +.B "\-Z EXT=CMD" +Activate an additional pattern in archive scanning. Files with name +extension EXT will be processed with CMD. CMD is invoked with the +file name added to its argument list, and is should produce the +archive on its standard output. debuginfod uses libarchive to consume +the result, so it can accept a wide range of archive formats and +compression. (Include the dot in EXT.) The default is no additional +patterns. This option may be repeated. + .TP .B "\-d FILE" "\-\-database=FILE" Set the path of the sqlite database used to store the index. This @@ -123,7 +135,8 @@ against the full path of each file, based on its \fBrealpath(3)\fP canonicalization. By default, all files are included and none are excluded. A file that matches both include and exclude REGEX is excluded. (The \fIcontents\fP of archive files are not subject to -inclusion or exclusion filtering: they are all processed.) +inclusion or exclusion filtering: they are all processed.) Only the +last of each type of regular expression given is used. .TP .B "\-t SECONDS" "\-\-rescan\-time=SECONDS"