Hi Aaron, On Tue, 2023-10-31 at 16:33 -0400, Aaron Merey wrote: > I'd like to merge this patch before the next release. Unless anyone objects > I'll merge it by Friday Nov 3.
Looks good to me. Some small comments below. > Commit message: BTW if you add comments like the above after the first "scissors" --- line, they wont show up in the actually commit message when applying the email message (that is why the stat output is put there). > Version 9 adds a "shortcut table" to the index. The shortcut table contains > the name and language of the main function, if it exists. > > A testcase added in this patch uses an executable written with Fortran. > This is because gdb does not currently populate the shortcut table of > C/C++ programs (see sourceware PR30996). > > Signed-off-by: Aaron Merey <ame...@redhat.com> > --- > src/readelf.c | 66 +++++++++++++++- > tests/Makefile.am | 3 +- > tests/run-readelf-gdb_index.sh | 95 +++++++++++++++++++++++- > tests/testfilegdbindex9-no-maininfo.bz2 | Bin 0 -> 3502 bytes > tests/testfilegdbindex9.bz2 | Bin 0 -> 4266 bytes > 5 files changed, 159 insertions(+), 5 deletions(-) > create mode 100755 tests/testfilegdbindex9-no-maininfo.bz2 > create mode 100755 tests/testfilegdbindex9.bz2 > > diff --git a/src/readelf.c b/src/readelf.c > index db31ad09..a28f6236 100644 > --- a/src/readelf.c > +++ b/src/readelf.c > @@ -11539,8 +11539,9 @@ print_gdb_index_section (Dwfl_Module *dwflmod, Ebl > *ebl, > // hash used for generating the table. Version 6 contains symbols > // for inlined functions, older versions didn't. Version 7 adds > // symbol kinds. Version 8 just indicates that it correctly includes > - // TUs for symbols. > - if (vers < 4 || vers > 8) > + // TUs for symbols. Version 9 adds shortcut table for information > + // regarding the main function. > + if (vers < 4 || vers > 9) > { > printf (_(" unknown version, cannot parse section\n")); > return; OK. BTW the description of the gdb_index at the top https://sourceware.org/gdb/current/onlinedocs/gdb/Index-Section-Format.html doesn't resolve anymore. It is now https://sourceware.org/gdb/current/onlinedocs/gdb.html/Index-Section-Format.html Maybe we should report that to the gdb project. > @@ -11578,6 +11579,17 @@ print_gdb_index_section (Dwfl_Module *dwflmod, Ebl > *ebl, > if (unlikely (readp + 4 > dataend)) > goto invalid_data; > > + uint32_t shortcut_off = 0; > + if (vers >= 9) > + { > + shortcut_off = read_4ubyte_unaligned (dbg, readp); > + printf (_(" shortcut offset: %#" PRIx32 "\n"), shortcut_off); > + > + readp += 4; > + if (unlikely (readp + 4 > dataend)) > + goto invalid_data; > + } > + > uint32_t const_off = read_4ubyte_unaligned (dbg, readp); > printf (_(" constant offset: %#" PRIx32 "\n"), const_off); OK > @@ -11675,8 +11687,19 @@ print_gdb_index_section (Dwfl_Module *dwflmod, Ebl > *ebl, > if (const_off >= data->d_size) > goto invalid_data; > > + const unsigned char *shortcut_start = NULL; > + if (vers >= 9) > + { > + if (shortcut_off >= data->d_size) > + goto invalid_data; > + > + shortcut_start = data->d_buf + shortcut_off; > + nextp = shortcut_start; > + } > + else > + nextp = const_start; > + > readp = data->d_buf + sym_off; > - nextp = const_start; > size_t sym_nr = (nextp - readp) / 8; > > printf (_("\n Symbol table at offset %#" PRIx32 OK > @@ -11750,6 +11773,43 @@ print_gdb_index_section (Dwfl_Module *dwflmod, Ebl > *ebl, > } > n++; > } > + > + if (vers < 9) > + return; > + > + if (unlikely (shortcut_start == NULL)) > + goto invalid_data; > + > + readp = shortcut_start; > + nextp = const_start; > + size_t shortcut_nr = (nextp - readp) / 4; > + > + if (unlikely (shortcut_nr != 2)) > + goto invalid_data; OK. So you just expect two entries. > + printf (_("\nShortcut table at offset %#" PRIx32 " contains %zu slots:\n"), > + shortcut_off, shortcut_nr); That is a fancy way to print "contains 2 slots" :) > + uint32_t lang = read_4ubyte_unaligned (dbg, readp); > + readp += 4; > + > + printf (_("Language of main: %s\n"), dwarf_lang_name (lang)); Note that dwarf_lang_name calls string_or_unknown with false for print_unknown_num so it might make sense to either flip that to true (but there is probably a reason it doesn't print the hex num) or to always add the hex num after the name so the user doesn't just get ??? on an unknown DWARF_LANG constant. > + printf (_("Name of main: ")); > + > + if (lang != 0) > + { > + uint32_t name = read_4ubyte_unaligned (dbg, readp); > + readp += 4; > + const unsigned char *sym = const_start + name; > + > + if (unlikely ((size_t) (dataend - const_start) < name > + || memchr (sym, '\0', dataend - sym) == NULL)) > + goto invalid_data; Good end of string check. BTW. DW_LANG constants are going away with DWARF6: https://dwarfstd.org/languages-v6.html > + printf ("%s\n", sym); > + } > + else > + printf ("<unknown>\n"); > } > > /* Returns true and sets split DWARF CU id if there is a split compile OK > diff --git a/tests/Makefile.am b/tests/Makefile.am > index ef5b6bb5..e8bc9058 100644 > --- a/tests/Makefile.am > +++ b/tests/Makefile.am > @@ -421,7 +421,8 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \ > run-readelf-Dd.sh \ > testfile-s390x-hash-both.bz2 \ > run-readelf-gdb_index.sh testfilegdbindex5.bz2 \ > - testfilegdbindex7.bz2 \ > + testfilegdbindex7.bz2 testfilegdbindex9.bz2 \ > + testfilegdbindex9-no-maininfo.bz2 \ > run-readelf-s.sh testfilebazdbg.bz2 testfilebazdyn.bz2 \ > testfilebazmin.bz2 testfilebazdbg.debug.bz2 testfilebazmdb.bz2 \ > testfilebaztab.bz2 testfilebasmin.bz2 testfilebaxmin.bz2 \ > diff --git a/tests/run-readelf-gdb_index.sh b/tests/run-readelf-gdb_index.sh > index fcbc3c57..95367ef8 100755 > --- a/tests/run-readelf-gdb_index.sh > +++ b/tests/run-readelf-gdb_index.sh > @@ -63,7 +63,7 @@ > # (gdb) save gdb-index . > # objcopy --add-section .gdb_index=testfilegdbindex7.gdb-index > --set-section-flags .gdb_index=readonly testfilegdbindex7 testfilegdbindex7 > > -testfiles testfilegdbindex5 testfilegdbindex7 > +testfiles testfilegdbindex5 testfilegdbindex7 testfilegdbindex9 > testfilegdbindex9-no-maininfo > > testrun_compare ${abs_top_builddir}/src/readelf --debug-dump=gdb_index > testfilegdbindex5 <<\EOF > > @@ -127,4 +127,97 @@ GDB section [33] '.gdb_index' at offset 0xe76 contains > 8399 bytes : > [ 754] symbol: int, CUs: 0 (type:S) > EOF > > +# testfilegdbindex9-no-maininfo is built the same way as testfilegdbindex7. > +testrun_compare ${abs_top_builddir}/src/readelf --debug-dump=gdb_index > testfilegdbindex9-no-maininfo <<\EOF > + > +GDB section [33] '.gdb_index' at offset 0x38e1 contains 8415 bytes : > + Version: 9 > + CU offset: 0x1c > + TU offset: 0x3c > + address offset: 0x54 > + symbol offset: 0x7c > + shortcut offset: 0x207c > + constant offset: 0x2084 > + > + CU list at offset 0x1c contains 2 entries: > + [ 0] start: 0x00004c, length: 220 > + [ 1] start: 0x000128, length: 214 > + > + TU list at offset 0x3c contains 1 entries: > + [ 0] CU offset: 0, type offset: 30, signature: 0x87e03f92cc37cdf0 > + > + Address list at offset 0x54 contains 2 entries: > + [ 0] 0x0000000000401106 <main>..0x000000000040113b <main+0x35>, CU index: > 1 > + [ 1] 0x000000000040113c <hello>..0x0000000000401173 <say+0x1c>, CU index: > 2 > + > + Symbol table at offset 0x54 contains 1024 slots: > + [ 123] symbol: global, CUs: 1 (var:G), 0T (var:G) > + [ 489] symbol: main, CUs: 1 (func:G) > + [ 518] symbol: char, CUs: 0 (type:S) > + [ 661] symbol: foo, CUs: 0 (type:S) > + [ 741] symbol: hello, CUs: 1 (var:S), 0T (func:S) > + [ 746] symbol: say, CUs: 0T (func:G) > + [ 754] symbol: int, CUs: 1 (type:S) > + > +Shortcut table at offset 0x207c contains 2 slots: > +Language of main: ??? > +Name of main: <unknown> > +EOF This seems an unfortunate example. Why is the language unknown? But maybe it is a nice example to show why you should at least print the hex num of the language? > +# testfilegdbindex9.f90 > +# > +# program repro > +# type small_stride > +# character*40 long_string > +# integer small_pad > +# end type small_stride > +# type(small_stride), dimension (20), target :: unpleasant > +# character*40, pointer, dimension(:):: c40pt > +# integer i > +# do i = 0,19 > +# unpleasant(i+1)%small_pad = i+1 > +# unpleasant(i+1)%long_string = char (ichar('0') + i) > +# end do > +# c40pt => unpleasant%long_string > +# print *, c40pt > +#end program repro > + > +# gfortran -g -o testfilegdbindex9 testfilegdbindex9.f90 > +# gdb-add-index testfilegdbindex9 > + > +testrun_compare ${abs_top_builddir}/src/readelf --debug-dump=gdb_index > testfilegdbindex9 <<\EOF > + > +GDB section [35] '.gdb_index' at offset 0x37d9 contains 8395 bytes : > + Version: 9 > + CU offset: 0x1c > + TU offset: 0x2c > + address offset: 0x2c > + symbol offset: 0x40 > + shortcut offset: 0x2040 > + constant offset: 0x2048 > + > + CU list at offset 0x1c contains 1 entries: > + [ 0] start: 00000000, length: 307 > + > + TU list at offset 0x2c contains 0 entries: > + > + Address list at offset 0x2c contains 1 entries: > + [ 0] 0x0000000000401166 <MAIN__>..0x00000000004013f0 <main+0x3a>, CU > index: 0 > + > + Symbol table at offset 0x2c contains 1024 slots: > + [ 61] symbol: small_stride, CUs: 0 (type:S) > + [ 71] symbol: integer(kind=8), CUs: 0 (type:S) > + [ 161] symbol: character(kind=1), CUs: 0 (type:S) > + [ 397] symbol: unpleasant, CUs: 0 (var:S) > + [ 489] symbol: main, CUs: 0 (func:G) > + [ 827] symbol: integer(kind=4), CUs: 0 (type:S) > + [ 858] symbol: c40pt, CUs: 0 (var:S) > + [ 965] symbol: repro, CUs: 0 (func:S) > + [1016] symbol: i, CUs: 0 (var:S) > + > +Shortcut table at offset 0x2040 contains 2 slots: > +Language of main: Fortran08 > +Name of main: repro > +EOF Nice.