* srclines.cxx: Introduce new option that places all the source files associated with a specified dwarf/elf file into a zip file and sends it to stdout.
* run-srcfiles-self.sh: Added test-case for the new zip feature. * srcfiles.1, NEWS: Added documentation for the new zip feature Example: % ./src/srcfiles -z -e /bin/ls > output.zip https://sourceware.org/bugzilla/show_bug.cgi?id=30991 Signed-off-by: Housam Alamour <halam...@redhat.com> --- NEWS | 4 + configure.ac | 15 +++ doc/srcfiles.1 | 20 +++- src/Makefile.am | 2 +- src/srcfiles.cxx | 193 ++++++++++++++++++++++++++++++++++--- tests/run-srcfiles-self.sh | 40 +++++++- 6 files changed, 251 insertions(+), 23 deletions(-) diff --git a/NEWS b/NEWS index 53c717eb..388a3b63 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,9 @@ Version 0.190 +srcfiles: added srcfiles tool that lists all the source files of a given + DWARF/ELF file. May also be used to create fetch the source files and + place them into a zip. + readelf: Support readelf -Ds, --use-dynamic --symbol. debuginfod: Schema change (reindexing required, sorry!) for a 60% diff --git a/configure.ac b/configure.ac index 29ed32fe..3bfd2097 100644 --- a/configure.ac +++ b/configure.ac @@ -880,6 +880,21 @@ AC_ARG_ENABLE(debuginfod-urls, AC_SUBST(DEBUGINFOD_URLS, $default_debuginfod_urls) AC_CONFIG_FILES([config/profile.sh config/profile.csh]) +dnl Check if libarchive is available to determine if the +dnl srcfiles --zip option should be enabled or disabled +AC_CACHE_CHECK([whether libarchive is available], + ac_cv_have_libarchive, + [AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + #include <archive.h> + ]])], + ac_cv_have_libarchive=yes, + ac_cv_have_libarchive=no)]) +AS_IF([test "x$ac_cv_have_libarchive" = "xyes"], + [ + AC_DEFINE([HAVE_LIBARCHIVE], [1], [Define to 1 if libarchive is available]) + ]) +AM_CONDITIONAL([HAVE_LIBARCHIVE], [test "x$ac_cv_have_libarchive" = "xyes"]) + AC_OUTPUT AC_MSG_NOTICE([ diff --git a/doc/srcfiles.1 b/doc/srcfiles.1 index 6149c21b..6157045b 100644 --- a/doc/srcfiles.1 +++ b/doc/srcfiles.1 @@ -21,15 +21,18 @@ eu-srcfiles \- Lists the source files of a DWARF/ELF file. .SH "SYNOPSIS" -eu-srcfiles [\fB\-0\fR|\fB\-\-null\fR] [\fB\-c\fR|\fB\-\-cu\-only\fR] [\fB\-v\fR|\fB\-\-verbose\fR] INPUT +eu-srcfiles [\fB\-0\fR|\fB\-\-null\fR] [\fB\-c\fR|\fB\-\-cu\-only\fR] [\fB\-v\fR|\fB\-\-verbose\fR] [\fB\-z\fR|\fB\-\-zip\fR] INPUT .SH "DESCRIPTION" -\fBeu-srcfiles\fR lists the source files of a given \s-DWARF/ELF\s0 +\fBeu-srcfiles\fR lists all the source files of a given DWARF/ELF file. This list is based on a search of the DWARF debuginfo, which may be automatically fetched by debuginfod if applicable. The target file may be an executable, a coredump, a process, or even the running kernel. The default is the file 'a.out'. The source file names are -made unique and printed to standard output. +made unique and printed to standard output. The source files can be +placed in a zip file that is sent to stdout. + +Note that all occurrences of '/./' and '/../' in the path name are canonicalized. .SH "INPUT OPTIONS" The long and short forms of options, shown here as alternatives, are @@ -78,7 +81,7 @@ Print program version. .TP \fB\-0, \-\-null\fR -Separate items by a null instead of a newline. +Separate items by a null instead of a newline. Cannot be used with the zip option becuase it raises errors when unzipping. .TP \fB\-c, \-\-cu\-only\fR @@ -88,6 +91,10 @@ Only list the CU names. \fB\-v, \-\-verbose\fR Increase verbosity of logging messages. +.TP +\fB\-z, \-\-zip\fR +Zip all the source files and send to stdout. Cannot be used with the null option becuase it raises errors when unzipping. + .SH EXAMPLES @@ -119,6 +126,11 @@ List the source files of a kernel image. eu-srcfiles -e /boot/vmlinuz-`uname -r` .ESAMPLE +Zip all the source files for a binary. +.SAMPLE +eu-srcfiles -z -e /bin/ls > ls.zip +.ESAMPLE + .SH "AUTHOR" Written by Housam Alamour. diff --git a/src/Makefile.am b/src/Makefile.am index d3d9d408..3853c152 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -85,7 +85,7 @@ stack_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) $(demanglelib) elfcompress_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) elfclassify_LDADD = $(libelf) $(libdw) $(libeu) $(argp_LDADD) srcfiles_SOURCES = srcfiles.cxx -srcfiles_LDADD = $(libdw) $(libelf) $(libeu) $(argp_LDADD) +srcfiles_LDADD = $(libdw) $(libelf) $(libeu) $(argp_LDADD) $(libarchive_LIBS) installcheck-binPROGRAMS: $(bin_PROGRAMS) bad=0; pid=$$$$; list="$(bin_PROGRAMS)"; for p in $$list; do \ diff --git a/src/srcfiles.cxx b/src/srcfiles.cxx index 3c7afdc4..b88ce919 100644 --- a/src/srcfiles.cxx +++ b/src/srcfiles.cxx @@ -15,7 +15,19 @@ You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ + + +/* In case we have a bad fts we include this before config.h because it + can't handle _FILE_OFFSET_BITS. + Everything we need here is fine if its declarations just come first. + Also, include sys/types.h before fts. On some systems fts.h is not self + contained. */ +#ifdef BAD_FTS + #include <sys/types.h> + #include <fts.h> +#endif +#include <config.h> #include "printversion.h" #include <dwarf.h> #include <argp.h> @@ -23,12 +35,31 @@ #include <set> #include <string> #include <cassert> -#include <config.h> #include <libdwfl.h> #include <fcntl.h> #include <iostream> #include <libdw.h> +#include <sstream> +#include <vector> + +/* Libraries for use by the --zip option */ +#ifdef HAVE_LIBARCHIVE + #include <archive.h> + #include <archive_entry.h> +#endif + +/* If fts.h is included before config.h, its indirect inclusions may not + give us the right LFS aliases of these functions, so map them manually. */ +#ifdef BAD_FTS + #ifdef _FILE_OFFSET_BITS + #define open open64 + #define fopen fopen64 + #endif +#else + #include <sys/types.h> + #include <fts.h> +#endif using namespace std; @@ -38,16 +69,23 @@ ARGP_PROGRAM_VERSION_HOOK_DEF = print_version; /* Bug report address. */ ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT; +constexpr size_t BUFFER_SIZE = 8192; + /* Definitions of arguments for argp functions. */ -static const struct argp_option options[] = +static const struct argp_option options[] = { { NULL, 0, NULL, OPTION_DOC, N_("Output options:"), 1 }, { "null", '0', NULL, 0, - N_ ("Separate items by a null instead of a newline."), 0 }, + N_ ("Separate items by a null instead of a newline. " + "Cannot be used with the zip option"), 0 }, { "verbose", 'v', NULL, 0, N_ ("Increase verbosity of logging messages."), 0 }, { "cu-only", 'c', NULL, 0, N_ ("Only list the CU names."), 0 }, - { NULL, 0, NULL, 0, NULL, 0 } + #ifdef HAVE_LIBARCHIVE + { "zip", 'z', NULL, 0, N_ ("Zip all the source files and send to stdout. " + "Cannot be used with the null option"), 0 }, + #endif + { NULL, 0, NULL, 0, NULL, 0 } }; /* Short description of program. */ @@ -73,8 +111,13 @@ static bool verbose; static bool null_arg; /* Only print compilation unit names. */ static bool CU_only; +#ifdef HAVE_LIBARCHIVE + /* Zip all the source files and send to stdout. */ + static bool zip; +#endif -/* Handle program arguments. */ +/* Handle program arguments. Note null arg and zip + cannot be combined due to warnings raised when unzipping. */ static error_t parse_opt (int key, char *arg, struct argp_state *state) { @@ -87,6 +130,13 @@ parse_opt (int key, char *arg, struct argp_state *state) break; case '0': + #ifdef HAVE_LIBARCHIVE + if (zip) + { + cerr << "Error: Cannot use both null and zip options simultaneously." << endl; + return ARGP_ERR_UNKNOWN; + } + #endif null_arg = true; break; @@ -97,6 +147,16 @@ parse_opt (int key, char *arg, struct argp_state *state) case 'c': CU_only = true; break; + + #ifdef HAVE_LIBARCHIVE + case 'z': + if (null_arg) { + cerr << "Error: Cannot use both null and zip options simultaneously." << endl; + return ARGP_ERR_UNKNOWN; + } + zip = true; + break; + #endif default: return ARGP_ERR_UNKNOWN; @@ -104,6 +164,34 @@ parse_opt (int key, char *arg, struct argp_state *state) return 0; } + /* Remove the "/./" , "../" and the preceding directory + that some paths include which raise errors during unzip */ + string canonicalize_path(string path) + { + stringstream ss(path); + string token; + vector<string> tokens; + /* Extract each of directory of the path and place into a vector */ + while (getline(ss, token, '/')) { + /* Ignore any empty //, or /./ dirs */ + if (token == "" || token == ".") + continue; + /* When /.. is encountered, remove the most recent directory from the vector */ + else if (token == "..") { + if (!tokens.empty()) + tokens.pop_back(); + } else + tokens.push_back(token); + } + stringstream result; + if (tokens.empty()) + return "/"; + /* Reconstruct the path from the extracted directories */ + for (const string &t : tokens) { + result << '/' << t; + } + return result.str(); + } /* Global list of collected source files. Normally, it'll contain the sources of just one named binary, but the '-K' option can cause @@ -119,13 +207,13 @@ collect_sourcefiles (Dwfl_Module *dwflmod, { Dwarf *dbg; Dwarf_Addr bias; /* ignored - for addressing purposes only */ - + dbg = dwfl_module_getdwarf (dwflmod, &bias); Dwarf_Off offset = 0; Dwarf_Off old_offset; size_t hsize; - + /* Traverse all CUs of this module. */ while (dwarf_nextcu (dbg, old_offset = offset, &offset, &hsize, NULL, NULL, NULL) == 0) { @@ -152,11 +240,20 @@ collect_sourcefiles (Dwfl_Module *dwflmod, comp_dir = ""; if (verbose) - std::clog << "searching for sources for cu=" << cuname + clog << "searching for sources for cu=" << cuname << " comp_dir=" << comp_dir << " #files=" << nfiles << " #dirs=" << ndirs << endl; + + if (comp_dir[0] == '\0' && cuname[0] != '/') + { + // This is a common symptom for dwz-compressed debug files, + // where the altdebug file cannot be resolved. + if (verbose) + clog << "skipping cu=" << cuname << " due to empty comp_dir" << endl; + continue; + } - for (size_t f = 1; f < nfiles; f++) + for (size_t f = 1; f < nfiles; ++f) { const char *hat; if (CU_only) @@ -172,7 +269,7 @@ collect_sourcefiles (Dwfl_Module *dwflmod, continue; if (string(hat).find("<built-in>") - != std::string::npos) /* gcc intrinsics, don't bother record */ + != string::npos) /* gcc intrinsics, don't bother record */ continue; string waldo; @@ -180,6 +277,13 @@ collect_sourcefiles (Dwfl_Module *dwflmod, waldo = (string (hat)); else if (comp_dir[0] != '\0') /* comp_dir relative */ waldo = (string (comp_dir) + string ("/") + string (hat)); + else + { + if (verbose) + clog << "skipping hat=" << hat << " due to empty comp_dir" << endl; + continue; + } + waldo = canonicalize_path (waldo); debug_sourcefiles.insert (waldo); } } @@ -188,29 +292,86 @@ collect_sourcefiles (Dwfl_Module *dwflmod, } +#ifdef HAVE_LIBARCHIVE +void zip_files() +{ + struct archive *a = archive_write_new(); + struct stat st; + char buff[BUFFER_SIZE]; + int len; + int fd = 0; + + archive_write_set_format_zip(a); + archive_write_open_fd(a, STDOUT_FILENO); + + for (const auto &file_path : debug_sourcefiles) + { + /* Create an entry for each file including file information to be placed in the zip */ + stat(file_path.c_str(), &st); + struct archive_entry *entry = archive_entry_new(); + /* Removing first "/"" to make the path "relative" before zipping, otherwise warnings are raised when unzipping */ + string entry_name = file_path.substr(file_path.find_first_of('/') + 1); + archive_entry_set_pathname(entry, entry_name.c_str()); + archive_entry_copy_stat(entry, &st); + archive_write_header(a, entry); + + /* Read the file contents and write it to the zip file */ + fd = open(file_path.c_str(), O_RDONLY); + if (!fd) + { + cerr << "Error: Failed to open the file: " << file_path << endl; + continue; + } + len = read(fd, buff, sizeof(buff)); + if (verbose && len > 0) + clog << "Writing to zip: " << file_path << endl; + while (len > 0) + { + archive_write_data(a, buff, len); + len = read(fd, buff, sizeof(buff)); + } + close(fd); + archive_entry_free(entry); + } + archive_write_close(a); + archive_write_free(a); +} +#endif + int main (int argc, char *argv[]) { int remaining; - + /* Parse and process arguments. This includes opening the modules. */ argp_children[0].argp = dwfl_standard_argp (); argp_children[0].group = 1; - + Dwfl *dwfl = NULL; (void) argp_parse (&argp, argc, argv, 0, &remaining, &dwfl); assert (dwfl != NULL); /* Process all loaded modules - probably just one, except if -K or -p is used. */ (void) dwfl_getmodules (dwfl, &collect_sourcefiles, NULL, 0); - if (!debug_sourcefiles.empty ()) + if (verbose) + { + clog <<"Filepaths within the debug_sourcefiles set:" << endl; + for (auto &file_path : debug_sourcefiles) + clog << file_path << "\n"; + clog << endl; + } + #ifdef HAVE_LIBARCHIVE + if (zip) + zip_files(); + #endif + else if (!debug_sourcefiles.empty ()) for (const string &element : debug_sourcefiles) { - std::cout << element; + cout << element; if (null_arg) - std::cout << '\0'; + cout << '\0'; else - std::cout << '\n'; + cout << '\n'; } dwfl_end (dwfl); diff --git a/tests/run-srcfiles-self.sh b/tests/run-srcfiles-self.sh index 0e64dd2b..22045c45 100755 --- a/tests/run-srcfiles-self.sh +++ b/tests/run-srcfiles-self.sh @@ -17,6 +17,9 @@ . $srcdir/test-subr.sh +# for test case debugging, uncomment: +set -x + # Test different command line combinations on the srcfiles binary itself. ET_EXEC="${abs_top_builddir}/src/srcfiles" ET_PID=$$ @@ -26,6 +29,9 @@ SRC_NAME="srcfiles.cxx" # Ensure the output contains the expected source file srcfiles.cxx testrun $ET_EXEC -e $ET_EXEC | grep $SRC_NAME > /dev/null +# Check if zip option is available (only available if libarchive is available) +$ET_EXEC --help | grep -q zip && zip=true || zip=false + for null_arg in --null ""; do for verbose_arg in --verbose ""; do testrun $ET_EXEC $null_arg $verbose_arg -p $ET_PID > /dev/null @@ -40,9 +46,39 @@ for null_arg in --null ""; do exit 1 fi - # Ensure that the output with the cu-only option contains less source files + # Ensure that the output with the cu-only option contains fewer source files if [ $(echo "$cu_only" | wc -m) -gt $(echo "$default" | wc -m) ]; then exit 1 fi + + if $zip; then + # Zip option tests + testrun $ET_EXEC $verbose_arg -z -e $ET_EXEC > test.zip + # Check if the 'unzip' command is available before attempting to use it + if command -v unzip >/dev/null 2>&1; then + # Check integrity of the zip + if ! unzip -t test.zip; then + echo "Unzip failed. zip corrupted." + exit 1 + fi + # Ensure that the zip contains srclines.cxx + if ! unzip -l test.zip | grep -q "$SRC_NAME"; then + echo "Unzip failed. srcfiles.cxx not found in compressed files." + exit 1 + fi + # Ensure unzipped srclines.cxx and its contents are the same as the original source file + unzip -j test.zip "*/$SRC_NAME" + if cmp -s ./srcfiles.cxx "$ET_EXEC"; then + echo "Unzip failed. srcfiles.cxx not found in decompressed files." + rm -f test.zip srcfiles.cxx + exit 1 + fi + rm -f test.zip srcfiles.cxx + # Unzip not available. Not checking integrity of the zip file. + else + echo "Unzip unavailable. Integrity of the zip file not checked." + rm -f test.zip + fi + fi done -done +done \ No newline at end of file -- 2.41.0