* srclines.cxx: Introduce new option that places all the
source files associated with a specified dwarf/elf file
into a zip file and sends it to stdout.

* run-srcfiles-self.sh: Added test-case for the new zip feature.

* srcfiles.1, NEWS: Added documentation for the new zip feature

Example:
% ./src/srcfiles -z -e /bin/ls > output.zip

https://sourceware.org/bugzilla/show_bug.cgi?id=30991

Signed-off-by: Housam Alamour <halam...@redhat.com>
---
 NEWS                       |   4 +
 configure.ac               |  15 +++
 doc/srcfiles.1             |  20 +++-
 src/Makefile.am            |   2 +-
 src/srcfiles.cxx           | 193 ++++++++++++++++++++++++++++++++++---
 tests/run-srcfiles-self.sh |  40 +++++++-
 6 files changed, 251 insertions(+), 23 deletions(-)

diff --git a/NEWS b/NEWS
index 53c717eb..388a3b63 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,9 @@
 Version 0.190
 
+srcfiles: added srcfiles tool that lists all the source files of a given 
+          DWARF/ELF file. May also be used to create fetch the source files and
+          place them into a zip.
+
 readelf: Support readelf -Ds, --use-dynamic --symbol.
 
 debuginfod: Schema change (reindexing required, sorry!) for a 60%
diff --git a/configure.ac b/configure.ac
index 29ed32fe..3bfd2097 100644
--- a/configure.ac
+++ b/configure.ac
@@ -880,6 +880,21 @@ AC_ARG_ENABLE(debuginfod-urls,
 AC_SUBST(DEBUGINFOD_URLS, $default_debuginfod_urls)                
 AC_CONFIG_FILES([config/profile.sh config/profile.csh])
 
+dnl Check if libarchive is available to determine if the
+dnl srcfiles --zip option should be enabled or disabled
+AC_CACHE_CHECK([whether libarchive is available],
+    ac_cv_have_libarchive,
+    [AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+        #include <archive.h>
+    ]])],
+        ac_cv_have_libarchive=yes,
+        ac_cv_have_libarchive=no)])
+AS_IF([test "x$ac_cv_have_libarchive" = "xyes"],
+    [
+        AC_DEFINE([HAVE_LIBARCHIVE], [1], [Define to 1 if libarchive is 
available])
+    ])
+AM_CONDITIONAL([HAVE_LIBARCHIVE], [test "x$ac_cv_have_libarchive" = "xyes"])
+
 AC_OUTPUT
 
 AC_MSG_NOTICE([
diff --git a/doc/srcfiles.1 b/doc/srcfiles.1
index 6149c21b..6157045b 100644
--- a/doc/srcfiles.1
+++ b/doc/srcfiles.1
@@ -21,15 +21,18 @@
 eu-srcfiles \- Lists the source files of a DWARF/ELF file.
 
 .SH "SYNOPSIS"
-eu-srcfiles [\fB\-0\fR|\fB\-\-null\fR] [\fB\-c\fR|\fB\-\-cu\-only\fR] 
[\fB\-v\fR|\fB\-\-verbose\fR] INPUT
+eu-srcfiles [\fB\-0\fR|\fB\-\-null\fR] [\fB\-c\fR|\fB\-\-cu\-only\fR] 
[\fB\-v\fR|\fB\-\-verbose\fR] [\fB\-z\fR|\fB\-\-zip\fR] INPUT
 
 .SH "DESCRIPTION"
-\fBeu-srcfiles\fR lists the source files of a given \s-DWARF/ELF\s0
+\fBeu-srcfiles\fR lists all the source files of a given DWARF/ELF
 file.  This list is based on a search of the DWARF debuginfo, which
 may be automatically fetched by debuginfod if applicable.  The target
 file may be an executable, a coredump, a process, or even the running
 kernel.  The default is the file 'a.out'.  The source file names are
-made unique and printed to standard output.
+made unique and printed to standard output. The source files can be
+placed in a zip file that is sent to stdout.
+
+Note that all occurrences of '/./' and '/../' in the path name are 
canonicalized.
 
 .SH "INPUT OPTIONS"
 The long and short forms of options, shown here as alternatives, are
@@ -78,7 +81,7 @@ Print program version.
 
 .TP
 \fB\-0, \-\-null\fR
-Separate items by a null instead of a newline.
+Separate items by a null instead of a newline. Cannot be used with the zip 
option becuase it raises errors when unzipping.
 
 .TP
 \fB\-c, \-\-cu\-only\fR
@@ -88,6 +91,10 @@ Only list the CU names.
 \fB\-v, \-\-verbose\fR
 Increase verbosity of logging messages.
 
+.TP
+\fB\-z, \-\-zip\fR
+Zip all the source files and send to stdout. Cannot be used with the null 
option becuase it raises errors when unzipping.
+
 
 .SH EXAMPLES
 
@@ -119,6 +126,11 @@ List the source files of a kernel image.
 eu-srcfiles -e /boot/vmlinuz-`uname -r`
 .ESAMPLE
 
+Zip all the source files for a binary.
+.SAMPLE
+eu-srcfiles -z -e /bin/ls > ls.zip
+.ESAMPLE
+
 
 .SH "AUTHOR"
 Written by Housam Alamour.
diff --git a/src/Makefile.am b/src/Makefile.am
index d3d9d408..3853c152 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -85,7 +85,7 @@ stack_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) 
$(argp_LDADD) $(demanglelib)
 elfcompress_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD)
 elfclassify_LDADD = $(libelf) $(libdw) $(libeu) $(argp_LDADD)
 srcfiles_SOURCES = srcfiles.cxx
-srcfiles_LDADD = $(libdw) $(libelf) $(libeu)  $(argp_LDADD)
+srcfiles_LDADD = $(libdw) $(libelf) $(libeu)  $(argp_LDADD) $(libarchive_LIBS)
 
 installcheck-binPROGRAMS: $(bin_PROGRAMS)
        bad=0; pid=$$$$; list="$(bin_PROGRAMS)"; for p in $$list; do \
diff --git a/src/srcfiles.cxx b/src/srcfiles.cxx
index 3c7afdc4..b88ce919 100644
--- a/src/srcfiles.cxx
+++ b/src/srcfiles.cxx
@@ -15,7 +15,19 @@
 
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+  
+
+/* In case we have a bad fts we include this before config.h because it
+   can't handle _FILE_OFFSET_BITS.
+   Everything we need here is fine if its declarations just come first.
+   Also, include sys/types.h before fts. On some systems fts.h is not self
+   contained. */
+#ifdef BAD_FTS
+  #include <sys/types.h>
+  #include <fts.h>
+#endif
 
+#include <config.h>
 #include "printversion.h"
 #include <dwarf.h>
 #include <argp.h>
@@ -23,12 +35,31 @@
 #include <set>
 #include <string>
 #include <cassert>
-#include <config.h>
 
 #include <libdwfl.h>
 #include <fcntl.h>
 #include <iostream>
 #include <libdw.h>
+#include <sstream>
+#include <vector>
+
+/*  Libraries for use by the --zip option */
+#ifdef HAVE_LIBARCHIVE
+  #include <archive.h>
+  #include <archive_entry.h>
+#endif
+
+/* If fts.h is included before config.h, its indirect inclusions may not
+   give us the right LFS aliases of these functions, so map them manually.  */
+#ifdef BAD_FTS
+  #ifdef _FILE_OFFSET_BITS
+    #define open open64
+    #define fopen fopen64
+  #endif
+#else
+  #include <sys/types.h>
+  #include <fts.h>
+#endif
 
 using namespace std;
 
@@ -38,16 +69,23 @@ ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
 /* Bug report address.  */
 ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
 
+constexpr size_t BUFFER_SIZE = 8192;
+
 /* Definitions of arguments for argp functions.  */
-static const struct argp_option options[] =
+static const struct argp_option options[] = 
 {
   { NULL, 0, NULL, OPTION_DOC, N_("Output options:"), 1 },
   { "null", '0', NULL, 0,
-    N_ ("Separate items by a null instead of a newline."), 0 },
+    N_ ("Separate items by a null instead of a newline. "
+    "Cannot be used with the zip option"), 0 },
   { "verbose", 'v', NULL, 0,
     N_ ("Increase verbosity of logging messages."), 0 },
   { "cu-only", 'c', NULL, 0, N_ ("Only list the CU names."), 0 },
-  { NULL, 0, NULL, 0, NULL, 0 }
+  #ifdef HAVE_LIBARCHIVE
+  { "zip", 'z', NULL, 0, N_ ("Zip all the source files and send to stdout. "
+    "Cannot be used with the null option"), 0 },
+  #endif
+  { NULL, 0, NULL, 0, NULL, 0 } 
 };
 
 /* Short description of program.  */
@@ -73,8 +111,13 @@ static bool verbose;
 static bool null_arg;
 /* Only print compilation unit names. */
 static bool CU_only;
+#ifdef HAVE_LIBARCHIVE
+  /* Zip all the source files and send to stdout. */
+  static bool zip;
+#endif
 
-/* Handle program arguments. */
+/*  Handle program arguments. Note null arg and zip 
+    cannot be combined due to warnings raised when unzipping. */
 static error_t
 parse_opt (int key, char *arg, struct argp_state *state)
 {
@@ -87,6 +130,13 @@ parse_opt (int key, char *arg, struct argp_state *state)
       break;
 
     case '0':
+      #ifdef HAVE_LIBARCHIVE
+        if (zip) 
+        {
+            cerr << "Error: Cannot use both null and zip options 
simultaneously." << endl;
+            return ARGP_ERR_UNKNOWN;
+        }
+      #endif
       null_arg = true;
       break;
 
@@ -97,6 +147,16 @@ parse_opt (int key, char *arg, struct argp_state *state)
     case 'c':
       CU_only = true;
       break;
+    
+    #ifdef HAVE_LIBARCHIVE
+      case 'z':
+      if (null_arg) {
+          cerr << "Error: Cannot use both null and zip options 
simultaneously." << endl;
+          return ARGP_ERR_UNKNOWN;
+      }
+      zip = true;
+      break;
+    #endif
 
     default:
       return ARGP_ERR_UNKNOWN;
@@ -104,6 +164,34 @@ parse_opt (int key, char *arg, struct argp_state *state)
   return 0;
 }
 
+  /*  Remove the "/./" , "../" and the preceding directory 
+      that some paths include which raise errors during unzip */
+  string canonicalize_path(string path) 
+  {
+      stringstream ss(path);
+      string token;
+      vector<string> tokens;
+      /*  Extract each of directory of the path and place into a vector */
+      while (getline(ss, token, '/')) {
+        /*  Ignore any empty //, or /./ dirs  */
+          if (token == "" || token == ".")
+              continue;
+        /*  When /.. is encountered, remove the most recent directory from the 
vector  */
+          else if (token == "..") {
+              if (!tokens.empty())
+                  tokens.pop_back();
+          } else
+              tokens.push_back(token);
+      }
+      stringstream result;
+      if (tokens.empty())
+          return "/";
+      /* Reconstruct the path from the extracted directories */
+      for (const string &t : tokens) {
+          result << '/' << t;
+      }
+      return result.str();
+  }
 
 /* Global list of collected source files.  Normally, it'll contain
    the sources of just one named binary, but the '-K' option can cause
@@ -119,13 +207,13 @@ collect_sourcefiles (Dwfl_Module *dwflmod,
 {
   Dwarf *dbg;
   Dwarf_Addr bias; /* ignored - for addressing purposes only  */
-
+  
   dbg = dwfl_module_getdwarf (dwflmod, &bias);
 
   Dwarf_Off offset = 0;
   Dwarf_Off old_offset;
   size_t hsize;
-
+  
   /* Traverse all CUs of this module.  */
   while (dwarf_nextcu (dbg, old_offset = offset, &offset, &hsize, NULL, NULL, 
NULL) == 0)
     {
@@ -152,11 +240,20 @@ collect_sourcefiles (Dwfl_Module *dwflmod,
         comp_dir = "";
 
       if (verbose)
-        std::clog << "searching for sources for cu=" << cuname
+        clog << "searching for sources for cu=" << cuname
                   << " comp_dir=" << comp_dir << " #files=" << nfiles
                   << " #dirs=" << ndirs << endl;
+      
+      if (comp_dir[0] == '\0' && cuname[0] != '/')
+        {
+          // This is a common symptom for dwz-compressed debug files,
+          // where the altdebug file cannot be resolved.
+          if (verbose)
+            clog << "skipping cu=" << cuname << " due to empty comp_dir" << 
endl;
+          continue;
+        }
 
-      for (size_t f = 1; f < nfiles; f++)
+      for (size_t f = 1; f < nfiles; ++f)
         {
           const char *hat;
           if (CU_only)
@@ -172,7 +269,7 @@ collect_sourcefiles (Dwfl_Module *dwflmod,
             continue;
 
           if (string(hat).find("<built-in>")
-              != std::string::npos) /* gcc intrinsics, don't bother record  */
+              != string::npos) /* gcc intrinsics, don't bother record  */
             continue;
 
           string waldo;
@@ -180,6 +277,13 @@ collect_sourcefiles (Dwfl_Module *dwflmod,
             waldo = (string (hat));
           else if (comp_dir[0] != '\0') /* comp_dir relative */
             waldo = (string (comp_dir) + string ("/") + string (hat));
+          else
+           {
+             if (verbose)
+              clog << "skipping hat=" << hat << " due to empty comp_dir" << 
endl;
+             continue;
+           }
+          waldo = canonicalize_path (waldo);
           debug_sourcefiles.insert (waldo);
         }
     }
@@ -188,29 +292,86 @@ collect_sourcefiles (Dwfl_Module *dwflmod,
 }
 
 
+#ifdef HAVE_LIBARCHIVE
+void zip_files()
+{
+  struct archive *a = archive_write_new();
+  struct stat st;
+  char buff[BUFFER_SIZE];
+  int len;
+  int fd = 0;
+
+  archive_write_set_format_zip(a);
+  archive_write_open_fd(a, STDOUT_FILENO);
+
+  for (const auto &file_path : debug_sourcefiles)
+  {
+    /* Create an entry for each file including file information to be placed 
in the zip */
+    stat(file_path.c_str(), &st);
+    struct archive_entry *entry = archive_entry_new();
+    /* Removing first "/"" to make the path "relative" before zipping, 
otherwise warnings are raised when unzipping */
+    string entry_name = file_path.substr(file_path.find_first_of('/') + 1);
+    archive_entry_set_pathname(entry, entry_name.c_str());
+    archive_entry_copy_stat(entry, &st);
+    archive_write_header(a, entry);
+
+    /* Read the file contents and write it to the zip file */
+    fd = open(file_path.c_str(), O_RDONLY);
+    if (!fd)
+    {
+      cerr << "Error: Failed to open the file: " << file_path << endl;
+      continue;
+    }
+    len = read(fd, buff, sizeof(buff));
+    if (verbose && len > 0)
+      clog << "Writing to zip: " << file_path << endl;
+    while (len > 0)
+    {
+      archive_write_data(a, buff, len);
+      len = read(fd, buff, sizeof(buff));
+    }
+    close(fd);
+    archive_entry_free(entry);
+  }
+  archive_write_close(a);
+  archive_write_free(a);
+}
+#endif
+
 int
 main (int argc, char *argv[])
 {
   int remaining;
-
+  
   /* Parse and process arguments.  This includes opening the modules.  */
   argp_children[0].argp = dwfl_standard_argp ();
   argp_children[0].group = 1;
-
+  
   Dwfl *dwfl = NULL;
   (void) argp_parse (&argp, argc, argv, 0, &remaining, &dwfl);
   assert (dwfl != NULL);
   /* Process all loaded modules - probably just one, except if -K or -p is 
used. */
   (void) dwfl_getmodules (dwfl, &collect_sourcefiles, NULL, 0);
 
-  if (!debug_sourcefiles.empty ())
+  if (verbose)
+  {
+    clog <<"Filepaths within the debug_sourcefiles set:" << endl;
+    for (auto &file_path : debug_sourcefiles)
+      clog << file_path << "\n";
+    clog << endl;
+  }
+  #ifdef HAVE_LIBARCHIVE
+    if (zip)
+      zip_files();
+  #endif
+  else if (!debug_sourcefiles.empty ())
     for (const string &element : debug_sourcefiles)
       {
-        std::cout << element;
+        cout << element;
         if (null_arg)
-          std::cout << '\0';
+          cout << '\0';
         else
-          std::cout << '\n';
+          cout << '\n';
       }
 
   dwfl_end (dwfl);
diff --git a/tests/run-srcfiles-self.sh b/tests/run-srcfiles-self.sh
index 0e64dd2b..22045c45 100755
--- a/tests/run-srcfiles-self.sh
+++ b/tests/run-srcfiles-self.sh
@@ -17,6 +17,9 @@
 
 . $srcdir/test-subr.sh
 
+# for test case debugging, uncomment:
+set -x
+
 # Test different command line combinations on the srcfiles binary itself.
 ET_EXEC="${abs_top_builddir}/src/srcfiles"
 ET_PID=$$
@@ -26,6 +29,9 @@ SRC_NAME="srcfiles.cxx"
 # Ensure the output contains the expected source file srcfiles.cxx
 testrun $ET_EXEC -e $ET_EXEC | grep $SRC_NAME > /dev/null
 
+# Check if zip option is available (only available if libarchive is available)
+$ET_EXEC --help | grep -q zip && zip=true || zip=false
+
 for null_arg in --null ""; do
   for verbose_arg in --verbose ""; do
     testrun $ET_EXEC $null_arg $verbose_arg -p $ET_PID > /dev/null
@@ -40,9 +46,39 @@ for null_arg in --null ""; do
       exit 1
     fi
 
-    # Ensure that the output with the cu-only option contains less source files
+    # Ensure that the output with the cu-only option contains fewer source 
files
     if [ $(echo "$cu_only" | wc -m) -gt $(echo "$default" | wc -m) ]; then
       exit 1
     fi
+
+    if $zip; then
+      # Zip option tests
+      testrun $ET_EXEC $verbose_arg -z -e $ET_EXEC > test.zip
+      # Check if the 'unzip' command is available before attempting to use it
+      if command -v unzip >/dev/null 2>&1; then
+        # Check integrity of the zip
+        if ! unzip -t test.zip; then
+          echo "Unzip failed. zip corrupted."
+          exit 1
+        fi
+        # Ensure that the zip contains srclines.cxx
+        if ! unzip -l test.zip | grep -q "$SRC_NAME"; then
+          echo "Unzip failed. srcfiles.cxx not found in compressed files."
+          exit 1
+        fi
+        # Ensure unzipped srclines.cxx and its contents are the same as the 
original source file
+        unzip -j test.zip "*/$SRC_NAME"
+        if cmp -s ./srcfiles.cxx "$ET_EXEC"; then
+          echo "Unzip failed. srcfiles.cxx not found in decompressed files."
+          rm -f test.zip srcfiles.cxx
+          exit 1
+        fi
+        rm -f test.zip srcfiles.cxx
+      # Unzip not available. Not checking integrity of the zip file.
+      else
+        echo "Unzip unavailable. Integrity of the zip file not checked."
+        rm -f test.zip
+      fi
+    fi
   done
-done
+done
\ No newline at end of file
-- 
2.41.0

Reply via email to