[PATCH] libdw: Handle .debug_loclists in dwarf_getlocation.

2018-05-28 Thread Mark Wielaard
Handle all new DW_LLE opcodes in .debug_loclists in dwarf_getlocation.
__libdw_read_begin_end_pair_inc now also handles a default location
(which is simply the range [0,-1]). Since expression blocks can now
also come from the .debug_loclists section add a new fake_loclists_cu
necessary for checking bounds while parsing expression blocks. Adapt
varlocs test to handle .debug only files. Run it on testfileranges5.debug
and testfilesplitranges5.debug.

testfilesplitranges5.debug had to be regenerated with a newer GCC because
of a bug in the generation of DW_LLE_startx_length:
https://gcc.gnu.org/ml/gcc-patches/2018-05/msg01562.html

Signed-off-by: Mark Wielaard 
---
 libdw/ChangeLog  |  17 
 libdw/dwarf_begin_elf.c  |  27 +++
 libdw/dwarf_end.c|   5 ++
 libdw/dwarf_getlocation.c|  99 ---
 libdw/dwarf_getlocation_attr.c   |   6 +-
 libdw/dwarf_ranges.c | 114 +++
 libdw/libdwP.h   |  87 +++-
 libdw/libdw_findcu.c |   1 +
 src/ChangeLog|   5 ++
 src/readelf.c|   4 +-
 tests/ChangeLog  |  11 +++
 tests/run-varlocs.sh | 148 +++
 tests/testfile-ranges-hello5.dwo.bz2 | Bin 1296 -> 1261 bytes
 tests/testfile-ranges-world5.dwo.bz2 | Bin 1466 -> 1514 bytes
 tests/testfilesplitranges5.debug.bz2 | Bin 2235 -> 2246 bytes
 tests/varlocs.c  |  39 +++--
 16 files changed, 538 insertions(+), 25 deletions(-)

diff --git a/libdw/ChangeLog b/libdw/ChangeLog
index 0db49bf..22712f1 100644
--- a/libdw/ChangeLog
+++ b/libdw/ChangeLog
@@ -1,3 +1,20 @@
+2018-04-07  Mark Wielaard  
+
+   * libdwP.h (struct Dwarf_CU): Add locs_base.
+   (__libdw_cu_locs_base): New static inline function.
+   * libdw_findcu.c (__libdw_intern_next_unit): Initialize locs_base.
+   * dwarf_begin_elf.c (valid_p): Create fake_loclists_cu if necessary.
+   * dwarf_end.c (dwarf_end): Clean up fake_loclists_cu.
+   * dwarf_getlocation.c (initial_offset): Handle .debug_loclists.
+   (getlocations_addr): Likewise.
+   (dwarf_getlocation_addr): Likewise.
+   * dwarf_getlocation_attr.c (attr_form_cu): Use fake_loclists_cu for
+   DWARF5.
+   (initial_offset): Handle DW_FORM_loclistx.
+   * dwarf_ranges.c (__libdw_read_begin_end_pair_inc): Handle
+   .debug_loclists.
+   * libdwP.h (struct Dwarf): Add fake_loclists_cu.
+
 2018-04-12  Mark Wielaard  
 
* dwarf.h: Add DWARF5 location list entry DW_LLE encodings.
diff --git a/libdw/dwarf_begin_elf.c b/libdw/dwarf_begin_elf.c
index af5096f..513af2b 100644
--- a/libdw/dwarf_begin_elf.c
+++ b/libdw/dwarf_begin_elf.c
@@ -226,6 +226,9 @@ valid_p (Dwarf *result)
   result = NULL;
 }
 
+  /* For dwarf_location_attr () we need a "fake" CU to indicate
+ where the "fake" attribute data comes from.  This is a block
+ inside the .debug_loc or .debug_loclists section.  */
   if (result != NULL && result->sectiondata[IDX_debug_loc] != NULL)
 {
   result->fake_loc_cu = (Dwarf_CU *) calloc (1, sizeof (Dwarf_CU));
@@ -248,6 +251,29 @@ valid_p (Dwarf *result)
}
 }
 
+  if (result != NULL && result->sectiondata[IDX_debug_loclists] != NULL)
+{
+  result->fake_loclists_cu = (Dwarf_CU *) calloc (1, sizeof (Dwarf_CU));
+  if (unlikely (result->fake_loclists_cu == NULL))
+   {
+ Dwarf_Sig8_Hash_free (&result->sig8_hash);
+ __libdw_seterrno (DWARF_E_NOMEM);
+ free (result->fake_loc_cu);
+ free (result);
+ result = NULL;
+   }
+  else
+   {
+ result->fake_loclists_cu->sec_idx = IDX_debug_loclists;
+ result->fake_loclists_cu->dbg = result;
+ result->fake_loclists_cu->startp
+   = result->sectiondata[IDX_debug_loclists]->d_buf;
+ result->fake_loclists_cu->endp
+   = (result->sectiondata[IDX_debug_loclists]->d_buf
+  + result->sectiondata[IDX_debug_loclists]->d_size);
+   }
+}
+
   /* For DW_OP_constx/GNU_const_index and DW_OP_addrx/GNU_addr_index
  the dwarf_location_attr () will need a "fake" address CU to
  indicate where the attribute data comes from.  This is a just
@@ -260,6 +286,7 @@ valid_p (Dwarf *result)
  Dwarf_Sig8_Hash_free (&result->sig8_hash);
  __libdw_seterrno (DWARF_E_NOMEM);
  free (result->fake_loc_cu);
+ free (result->fake_loclists_cu);
  free (result);
  result = NULL;
}
diff --git a/libdw/dwarf_end.c b/libdw/dwarf_end.c
index 1954674..23a50a0 100644
--- a/libdw/dwarf_end.c
+++ b/libdw/dwarf_end.c
@@ -113,6 +113,11 @@ dwarf_end (Dwarf *dwarf)
  cu_free (dwarf->fake_loc_cu);
  free (dwarf->fake_loc_cu);
}
+  if (dwarf->fake_loclists_cu != NULL)
+   {
+ cu_free (dwarf->fake_

Re: [PATCH] libdw: Handle .debug_rnglists in dwarf_ranges.

2018-05-28 Thread Mark Wielaard
On Thu, 2018-05-24 at 21:08 +0200, Mark Wielaard wrote:
> Handle all new DW_RLE opcodes in .debug_rnglists in dwarf_ranges. Extract
> code for reading .debug_addr indexes from dwarf_formaddr as __libdw_addrx
> to reuse in __libdw_read_begin_end_pair_inc. And add new testcase.

Pushed this to master.

But I first regenerated the new test files with a newer GCC that fixes
the location expressions as promised for the just posted "libdw: Handle
.debug_loclists in dwarf_getlocation" patch.

It seemed better to only have one set in the tree.
Both testfileranges5.debug and testfilesplitranges5.debug got
regenerated (and the test results slightly adjusted) to show that the
ranges are identical for both, even though they are using completely
different DWARF encodings.

Cheers,

Mark


[PATCH] readelf handle .debug_addr section.

2018-05-28 Thread Mark Wielaard
Add debug-dump=addr which will show the .debug_addr section tables.
The only tricky bit is the fact that GNU DebugFission, a DWARF4
extension, didn't produce unit table headers. So if we see a mixed
DWARF4/5 .debug_addr table we have to reconstruct the table length
from the CU DIE DW_AT_[GNU_]_addr_base offsets.

Signed-off-by: Mark Wielaard 
---
 src/ChangeLog   |  12 ++
 src/readelf.c   | 267 +++-
 tests/ChangeLog |   7 ++
 tests/Makefile.am   |   2 +
 tests/run-readelf-addr.sh   | 143 
 tests/run-readelf-ranges.sh |   4 +-
 6 files changed, 431 insertions(+), 4 deletions(-)
 create mode 100755 tests/run-readelf-addr.sh

diff --git a/src/ChangeLog b/src/ChangeLog
index b6c2743..01ecc61 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,15 @@
+2018-04-27  Mark Wielaard  
+
+   * readelf.c (options): Add addr.
+   (enum section_e): Add section_addr.
+   (section_all): Add section_addr.
+   (parse_opt): Parse "addr".
+   (known_addrbases): New static variable.
+   (get_listptr): New function.
+   (print_debug_addr_section): Likewise.
+   (attr_callback): Handle DW_AT_addr_base and DW_AT_GNU_addr_base.
+   (print_debug): Add NEW_SECTION (addr). Reset known_addrbases.
+
 2018-04-07  Mark Wielaard  
 
* readelf.c (attr_callback): Handle DW_FORM_loclistx and
diff --git a/src/readelf.c b/src/readelf.c
index 0171673..de38ac6 100644
--- a/src/readelf.c
+++ b/src/readelf.c
@@ -121,7 +121,7 @@ static const struct argp_option options[] =
 
   { NULL, 0, NULL, 0, N_("Additional output selection:"), 0 },
   { "debug-dump", 'w', "SECTION", OPTION_ARG_OPTIONAL,
-N_("Display DWARF section content.  SECTION can be one of abbrev, "
+N_("Display DWARF section content.  SECTION can be one of abbrev, addr, "
"aranges, decodedaranges, frame, gdb_index, info, info+, loc, line, "
"decodedline, ranges, pubnames, str, macinfo, macro or exception"), 0 },
   { "hex-dump", 'x', "SECTION", 0,
@@ -248,11 +248,12 @@ static enum section_e
   section_exception = 1024,/* .eh_frame & al.  */
   section_gdb_index = 2048,/* .gdb_index  */
   section_macro = 4096,/* .debug_macro  */
+  section_addr = 8192,
   section_all = (section_abbrev | section_aranges | section_frame
 | section_info | section_line | section_loc
 | section_pubnames | section_str | section_macinfo
 | section_ranges | section_exception | section_gdb_index
-| section_macro)
+| section_macro | section_addr)
 } print_debug_sections, implicit_debug_sections;
 
 /* Select hex dumping of sections.  */
@@ -442,6 +443,11 @@ parse_opt (int key, char *arg,
}
   else if (strcmp (arg, "abbrev") == 0)
print_debug_sections |= section_abbrev;
+  else if (strcmp (arg, "addr") == 0)
+   {
+ print_debug_sections |= section_addr;
+ implicit_debug_sections |= section_info;
+   }
   else if (strcmp (arg, "aranges") == 0)
print_debug_sections |= section_aranges;
   else if (strcmp (arg, "decodedaranges") == 0)
@@ -4817,6 +4823,7 @@ static struct listptr_table known_locsptr;
 static struct listptr_table known_loclistsptr;
 static struct listptr_table known_rangelistptr;
 static struct listptr_table known_rnglistptr;
+static struct listptr_table known_addrbases;
 
 static void
 reset_listptr (struct listptr_table *table)
@@ -4934,6 +4941,15 @@ next_listptr_offset (struct listptr_table *table, size_t 
idx)
   return 0;
 }
 
+/* Returns the listptr associated with the given index, or NULL.  */
+static struct listptr *
+get_listptr (struct listptr_table *table, size_t idx)
+{
+  if (idx >= table->n)
+return NULL;
+  return &table->table[idx];
+}
+
 /* Returns the next index, base address and CU associated with the
list unit offsets.  If there is none false is returned, otherwise
true.  Assumes the table has been sorted.  */
@@ -5033,6 +5049,235 @@ print_debug_abbrev_section (Dwfl_Module *dwflmod 
__attribute__ ((unused)),
 }
 
 
+static void
+print_debug_addr_section (Dwfl_Module *dwflmod __attribute__ ((unused)),
+ Ebl *ebl, GElf_Ehdr *ehdr,
+ Elf_Scn *scn, GElf_Shdr *shdr, Dwarf *dbg)
+{
+  printf (gettext ("\
+\nDWARF section [%2zu] '%s' at offset %#" PRIx64 ":\n"),
+ elf_ndxscn (scn), section_name (ebl, ehdr, shdr),
+ (uint64_t) shdr->sh_offset);
+
+  if (shdr->sh_size == 0)
+return;
+
+  /* We like to get the section from libdw to make sure they are relocated.  */
+  Elf_Data *data = (dbg->sectiondata[IDX_debug_addr]
+   ?: elf_rawdata (scn, NULL));
+  if (unlikely (data == NULL))
+{
+  error (0, 0, gettext ("cannot get .debug_addr section data: %s"),
+elf_errmsg (-1));
+  return;
+}
+
+  size_t idx = 0;
+  sort_listptr (&known_add

[PATCH] readelf: Handle .debug_str_offsets.

2018-05-28 Thread Mark Wielaard
The .debug_str_offsets tables are indirect string offsets into the
.debug_str section. For DWARF5 they can be in both the main, skeleton
and split dwarf (.dwo) files.

For DWARF4 with the GNU DebugFission extension the tables will not have
an header and they will only be in the split DWARF (.dwo) file, never in
the main (skeleton) file.

For DWARF5 the (non-split) unit DIE will have a DW_AT_str_offsets_base
attribute pointing at the actual index (after the header). The split
unit will never have this attribute (and use the table at offset zero).

Signed-off-by: Mark Wielaard 
---
 src/ChangeLog|   9 ++
 src/readelf.c| 213 ++-
 tests/ChangeLog  |   6 ++
 tests/Makefile.am|   4 +-
 tests/run-readelf-str.sh | 211 ++
 5 files changed, 440 insertions(+), 3 deletions(-)
 create mode 100755 tests/run-readelf-str.sh

diff --git a/src/ChangeLog b/src/ChangeLog
index 01ecc61..545fb50 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,12 @@
+2018-04-29  Mark Wielaard  
+
+   * readelf.c (parse_opt): Request implicit section_info for "str".
+   (known_stroffbases): New static variable.
+   (attr_callbackattr_callback): Handle DW_AT_str_offets_base.
+   (print_debug_str_offsets_section): New function.
+   (print_debug): Handle .debug_str_offsets as section_str. Reset
+   known_stroffbases.
+
 2018-04-27  Mark Wielaard  
 
* readelf.c (options): Add addr.
diff --git a/src/readelf.c b/src/readelf.c
index de38ac6..89960b3 100644
--- a/src/readelf.c
+++ b/src/readelf.c
@@ -484,7 +484,11 @@ parse_opt (int key, char *arg,
   else if (strcmp (arg, "pubnames") == 0)
print_debug_sections |= section_pubnames;
   else if (strcmp (arg, "str") == 0)
-   print_debug_sections |= section_str;
+   {
+ print_debug_sections |= section_str;
+ /* For mapping string offset tables to CUs.  */
+ implicit_debug_sections |= section_info;
+   }
   else if (strcmp (arg, "macinfo") == 0)
print_debug_sections |= section_macinfo;
   else if (strcmp (arg, "macro") == 0)
@@ -4824,6 +4828,7 @@ static struct listptr_table known_loclistsptr;
 static struct listptr_table known_rangelistptr;
 static struct listptr_table known_rnglistptr;
 static struct listptr_table known_addrbases;
+static struct listptr_table known_stroffbases;
 
 static void
 reset_listptr (struct listptr_table *table)
@@ -7192,6 +7197,21 @@ attr_callback (Dwarf_Attribute *attrp, void *arg)
  }
  return DWARF_CB_OK;
 
+   case DW_AT_str_offsets_base:
+ {
+   bool stroffbase = notice_listptr (section_str, &known_stroffbases,
+ cbargs->addrsize,
+ cbargs->offset_size,
+ cbargs->cu, num, attr);
+   if (!cbargs->silent)
+ printf ("   %*s%-20s (%s) str offsets base [%6"
+ PRIxMAX "]%s\n",
+ (int) (level * 2), "", dwarf_attr_name (attr),
+ dwarf_form_name (form), (uintmax_t) num,
+ stroffbase ? "" : " ");
+ }
+ return DWARF_CB_OK;
+
case DW_AT_language:
  valuestr = dwarf_lang_name (num);
  break;
@@ -9936,6 +9956,193 @@ print_debug_str_section (Dwfl_Module *dwflmod 
__attribute__ ((unused)),
 }
 }
 
+static void
+print_debug_str_offsets_section (Dwfl_Module *dwflmod __attribute__ ((unused)),
+Ebl *ebl, GElf_Ehdr *ehdr,
+Elf_Scn *scn, GElf_Shdr *shdr, Dwarf *dbg)
+{
+  printf (gettext ("\
+\nDWARF section [%2zu] '%s' at offset %#" PRIx64 ":\n"),
+ elf_ndxscn (scn), section_name (ebl, ehdr, shdr),
+ (uint64_t) shdr->sh_offset);
+
+  if (shdr->sh_size == 0)
+return;
+
+  /* We like to get the section from libdw to make sure they are relocated.  */
+  Elf_Data *data = (dbg->sectiondata[IDX_debug_str_offsets]
+   ?: elf_rawdata (scn, NULL));
+  if (unlikely (data == NULL))
+{
+  error (0, 0, gettext ("cannot get .debug_str_offsets section data: %s"),
+elf_errmsg (-1));
+  return;
+}
+
+  size_t idx = 0;
+  sort_listptr (&known_stroffbases, "str_offsets");
+
+  const unsigned char *start = (const unsigned char *) data->d_buf;
+  const unsigned char *readp = start;
+  const unsigned char *readendp = ((const unsigned char *) data->d_buf
+  + data->d_size);
+
+  while (readp < readendp)
+{
+  /* Most string offset tables will have a header.  For split
+dwarf unit GNU DebugFission didn't add one.  But they were
+also only defined for split units (main or skeleton units
+didn't have indirect strings).  So if we don't have a
+DW_AT_str_offsets_base at all and thi

Re: [PATCH] readelf: Find skeleton units when inspecting split .dwo (--dwarf-skeleton).

2018-05-28 Thread Mark Wielaard
On Fri, 2018-05-25 at 14:43 +0200, Mark Wielaard wrote:
> To get the right context (especially addresses) when looking at a .dwo file
> we really need the skeleton file.  If we can find it (simply replace .dwo
> with .o) then use that to get to the split DWARF units so that libdw sets
> up all relevant information to resolve.
> 
> Also adds a --dwarf-skeleton option so the user can explicitly give a
> skeleton file to use (for example when all .o files are linked and removed
> already).  Unfortunately this might not work if libdw cannot get from the
> skeleton file to the .dwo file (because they have been moved around).
> In that case eu-readelf "cheats", it will link up the libdw datastructures
> so that the skeleton and split DWARF units are setup correctly anyway.
> This does introduce a problem when trying to cleanup the Dwarf handle
> ownership graph. So we will deliberately leak memory by not closing the
> underlying Dwfl in that case.

Pushed to master.


[PATCH] libdw: Fix memory corruption in libdw_find_split_unit.

2018-05-28 Thread Mark Wielaard
Found by valgrind when trying to match a split unit from a .dwo file
that doesn't contain the split unit (as first) match. We would close
the split Dwarf too early, before we had inspected all units in it.

Add a testcase that simulates this. Which failed (at least under valgrind
as run by make distcheck) before the fix.

Signed-off-by: Mark Wielaard 
---
 libdw/ChangeLog|  5 +
 libdw/libdw_find_split_unit.c  | 11 +--
 src/ChangeLog  |  5 +
 src/readelf.c  |  2 +-
 tests/ChangeLog|  6 ++
 tests/Makefile.am  |  5 +++--
 tests/run-readelf-info-plus.sh | 31 +++
 7 files changed, 56 insertions(+), 9 deletions(-)
 create mode 100755 tests/run-readelf-info-plus.sh

diff --git a/libdw/ChangeLog b/libdw/ChangeLog
index 22712f1..d187930 100644
--- a/libdw/ChangeLog
+++ b/libdw/ChangeLog
@@ -1,3 +1,8 @@
+2018-05-28  Mark Wielaard  
+
+   * libdw_find_split_unit.c (__libdw_find_split_unit): End split_dwarf
+   only after we tried every unit id in it.
+
 2018-04-07  Mark Wielaard  
 
* libdwP.h (struct Dwarf_CU): Add locs_base.
diff --git a/libdw/libdw_find_split_unit.c b/libdw/libdw_find_split_unit.c
index fcfc46e..d6527e0 100644
--- a/libdw/libdw_find_split_unit.c
+++ b/libdw/libdw_find_split_unit.c
@@ -94,14 +94,13 @@ __libdw_find_split_unit (Dwarf_CU *cu)
  elf_cntl (split_dwarf->elf, ELF_C_FDDONE);
  break;
}
-
- if (cu->split == (Dwarf_CU *) -1)
-   dwarf_end (split_dwarf);
}
- /* Always close, because we don't want to run
-out of file descriptors.  See also the
-elf_fcntl ELF_C_FDDONE call above.  */
+ if (cu->split == (Dwarf_CU *) -1)
+   dwarf_end (split_dwarf);
}
+ /* Always close, because we don't want to run
+out of file descriptors.  See also the
+elf_fcntl ELF_C_FDDONE call above.  */
  close (split_fd);
}
  free (dwo_path);
diff --git a/src/ChangeLog b/src/ChangeLog
index 545fb50..b6f66bd 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,8 @@
+2018-05-28  Mark Wielaard  
+
+   * readelf.c (print_debug_units): Turn "Could not find split compile
+   unit" into an Warning instead of an error.
+
 2018-04-29  Mark Wielaard  
 
* readelf.c (parse_opt): Request implicit section_info for "str".
diff --git a/src/readelf.c b/src/readelf.c
index be9fe88..bfa1d16 100644
--- a/src/readelf.c
+++ b/src/readelf.c
@@ -7668,7 +7668,7 @@ print_debug_units (Dwfl_Module *dwflmod,
  || dwarf_tag (&subdie) == DW_TAG_invalid)
{
  if (!silent)
-   error (0, 0, gettext ("Could not find split compile unit"));
+   fprintf (stderr, gettext ("Could not find split compile unit"));
}
   else
{
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 7ac6bd3..4d69515 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2018-05-28  Mark Wielaard  
+
+   * run-readelf-info-plus.sh: New test.
+   * Makefile.am (TESTS): Add run-readelf-info-plus.sh.
+   (EXTRA_DIST): Likewise.
+
 2018-04-29  Mark Wielaard  
 
* run-readelf-addr.sh: New test.
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 4cd0665..e935410 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -93,7 +93,7 @@ TESTS = run-arextract.sh run-arsymtest.sh run-ar.sh newfile 
test-nlist \
run-ranlib-test2.sh run-ranlib-test3.sh run-ranlib-test4.sh \
run-addrscopes.sh run-strings-test.sh run-funcscopes.sh \
run-find-prologues.sh run-allregs.sh run-addrcfi.sh \
-   run-nm-self.sh run-readelf-self.sh \
+   run-nm-self.sh run-readelf-self.sh run-readelf-info-plus.sh \
run-varlocs-self.sh run-exprlocs-self.sh \
run-readelf-test1.sh run-readelf-test2.sh run-readelf-test3.sh \
run-readelf-test4.sh run-readelf-twofiles.sh \
@@ -197,7 +197,8 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \
 run-elflint-self.sh run-ranlib-test.sh run-ranlib-test2.sh \
 run-ranlib-test3.sh run-ranlib-test4.sh \
 run-addrscopes.sh run-strings-test.sh run-funcscopes.sh \
-run-nm-self.sh run-readelf-self.sh run-addrcfi.sh \
+run-nm-self.sh run-readelf-self.sh run-readelf-info-plus.sh \
+run-addrcfi.sh \
 run-varlocs-self.sh run-exprlocs-self.sh \
 run-find-prologues.sh run-allregs.sh run-native-test.sh \
 run-addrname-test.sh run-dwfl-bug-offline-rel.sh \
diff --git a/tests/run-readelf-info-plus.sh b/tests/run-readelf-info-plus.sh
new file mode 100755
index 000..ee1db02
--- /dev/null
+++ b/tests/