Hi Omar,

On Wed, 2023-12-06 at 01:22 -0800, Omar Sandoval wrote:
> The .debug_cu_index and .debug_tu_index sections in DWARF package files
> are basically hash tables mapping a unit's 8 byte signature to an offset
> and size in each section used by that unit [1].  Add support for parsing
> and doing lookups in the index sections.
> 
> We look up a unit in the index when we intern it and cache its hash
> table row in Dwarf_CU.  Then, a new function, dwarf_cu_dwp_section_info,
> can be used to look up the section offsets and sizes for a unit.  This
> will mostly be used internally in libdw, but it will also be needed in
> static inline functions shared with eu-readelf.  Additionally, making it
> public it makes dwp support much easier for external tools that do their
> own low-level parsing of DWARF information, like drgn [2].

You convinced me that dwarf_cu_dwp_section_info should be a public
function. And speaking of eu-readelf, we should also make it parse the
.debug_tu_index and .debug_cu_index sections. But that is for another
time.

Pushed with one merge conflict resolved.

Please look over the review comments below to see if I interpreted
anything wrongly.

> 1: 
> https://gcc.gnu.org/wiki/DebugFissionDWP#Format_of_the_CU_and_TU_Index_Sections
> 2: https://github.com/osandov/drgn
> 
>       * libdw/dwarf.h: Add DW_SECT_TYPES.
>       * libdw/libdwP.h (Dwarf): Add cu_index and tu_index.
>       (Dwarf_CU): Add dwp_row.
>       (Dwarf_Package_Index): New type.
>       (__libdw_dwp_find_unit): New declaration.
>       (dwarf_cu_dwp_section_info): New INTDECL.
>       Add DWARF_E_UNKNOWN_SECTION.
>       * libdw/Makefile.am (libdw_a_SOURCES): Add
>       dwarf_cu_dwp_section_info.c.
>       * libdw/dwarf_end.c (dwarf_end): Free dwarf->cu_index and
>       dwarf->tu_index.
>       * libdw/dwarf_error.c (errmsgs): Add DWARF_E_UNKNOWN_SECTION.
>       * libdw/libdw.h (dwarf_cu_dwp_section_info): New declaration.
>       * libdw/libdw.map (ELFUTILS_0.190): Add
>       dwarf_cu_dwp_section_info.
>       * libdw/libdw_findcu.c (__libdw_intern_next_unit): Call
>       __libdw_dwp_find_unit, and use it to adjust abbrev_offset and
>       assign newp->dwp_row.
>       * libdw/dwarf_cu_dwp_section_info.c: New file.
>       * tests/Makefile.am (check_PROGRAMS): Add cu-dwp-section-info.
>       (TESTS): Add run-cu-dwp-section-info.sh
>       (EXTRA_DIST): Add run-cu-dwp-section-info.sh and new test files.
>       (cu_dwp_section_info_LDADD): New variable.
>       * tests/cu-dwp-section-info.c: New test.
>       * tests/run-cu-dwp-section-info.sh: New test.
>       * tests/testfile-dwp-4-strict.bz2: New test file.
>       * tests/testfile-dwp-4-strict.dwp.bz2: New test file.
>       * tests/testfile-dwp-4.bz2: New test file.
>       * tests/testfile-dwp-4.dwp.bz2: New test file.
>       * tests/testfile-dwp-5.bz2: New test file.
>       * tests/testfile-dwp-5.dwp.bz2: New test file.
>       * tests/testfile-dwp.source: New file.
> 
> Signed-off-by: Omar Sandoval <osan...@fb.com>
> ---
>  libdw/Makefile.am                   |   2 +-
>  libdw/dwarf.h                       |   2 +-
>  libdw/dwarf_cu_dwp_section_info.c   | 371 ++++++++++++++++++++++++++++
>  libdw/dwarf_end.c                   |   3 +
>  libdw/dwarf_error.c                 |   1 +
>  libdw/libdw.h                       |  23 ++
>  libdw/libdw.map                     |   5 +
>  libdw/libdwP.h                      |  33 +++
>  libdw/libdw_findcu.c                |   8 +
>  tests/.gitignore                    |   1 +
>  tests/Makefile.am                   |  11 +-
>  tests/cu-dwp-section-info.c         |  73 ++++++
>  tests/run-cu-dwp-section-info.sh    | 168 +++++++++++++
>  tests/testfile-dwp-4-strict.bz2     | Bin 0 -> 4169 bytes
>  tests/testfile-dwp-4-strict.dwp.bz2 | Bin 0 -> 6871 bytes
>  tests/testfile-dwp-4.bz2            | Bin 0 -> 4194 bytes
>  tests/testfile-dwp-4.dwp.bz2        | Bin 0 -> 10098 bytes
>  tests/testfile-dwp-5.bz2            | Bin 0 -> 4223 bytes
>  tests/testfile-dwp-5.dwp.bz2        | Bin 0 -> 10313 bytes
>  tests/testfile-dwp.source           | 102 ++++++++
>  20 files changed, 798 insertions(+), 5 deletions(-)
>  create mode 100644 libdw/dwarf_cu_dwp_section_info.c
>  create mode 100644 tests/cu-dwp-section-info.c
>  create mode 100755 tests/run-cu-dwp-section-info.sh
>  create mode 100755 tests/testfile-dwp-4-strict.bz2
>  create mode 100644 tests/testfile-dwp-4-strict.dwp.bz2
>  create mode 100755 tests/testfile-dwp-4.bz2
>  create mode 100644 tests/testfile-dwp-4.dwp.bz2
>  create mode 100755 tests/testfile-dwp-5.bz2
>  create mode 100644 tests/testfile-dwp-5.dwp.bz2
>  create mode 100644 tests/testfile-dwp.source
> 
> diff --git a/libdw/Makefile.am b/libdw/Makefile.am
> index e548f38c..5363c02a 100644
> --- a/libdw/Makefile.am
> +++ b/libdw/Makefile.am
> @@ -93,7 +93,7 @@ libdw_a_SOURCES = dwarf_begin.c dwarf_begin_elf.c 
> dwarf_end.c dwarf_getelf.c \
>                 dwarf_cu_die.c dwarf_peel_type.c dwarf_default_lower_bound.c \
>                 dwarf_die_addr_die.c dwarf_get_units.c \
>                 libdw_find_split_unit.c dwarf_cu_info.c \
> -               dwarf_next_lines.c
> +               dwarf_next_lines.c dwarf_cu_dwp_section_info.c
>  
>  if MAINTAINER_MODE
>  BUILT_SOURCES = $(srcdir)/known-dwarf.h

OK.

> diff --git a/libdw/dwarf.h b/libdw/dwarf.h
> index b2e49db2..4be32de5 100644
> --- a/libdw/dwarf.h
> +++ b/libdw/dwarf.h
> @@ -942,7 +942,7 @@ enum
>  enum
>    {
>      DW_SECT_INFO = 1,
> -    /* Reserved = 2, */
> +    DW_SECT_TYPES = 2, /* Only DWARF4 GNU DebugFission. Reserved in DWARF5.  
> */
>      DW_SECT_ABBREV = 3,
>      DW_SECT_LINE = 4,
>      DW_SECT_LOCLISTS = 5,

OK.

> diff --git a/libdw/dwarf_cu_dwp_section_info.c 
> b/libdw/dwarf_cu_dwp_section_info.c
> new file mode 100644
> index 00000000..4a4eac8c
> --- /dev/null
> +++ b/libdw/dwarf_cu_dwp_section_info.c
> @@ -0,0 +1,371 @@
> +/* Read DWARF package file index sections.
> +   Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
> +   This file is part of elfutils.
> +
> +   This file is free software; you can redistribute it and/or modify
> +   it under the terms of either
> +
> +     * the GNU Lesser General Public License as published by the Free
> +       Software Foundation; either version 3 of the License, or (at
> +       your option) any later version
> +
> +   or
> +
> +     * the GNU General Public License as published by the Free
> +       Software Foundation; either version 2 of the License, or (at
> +       your option) any later version
> +
> +   or both in parallel, as here.
> +
> +   elfutils is distributed in the hope that it will be useful, but
> +   WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   General Public License for more details.
> +
> +   You should have received copies of the GNU General Public License and
> +   the GNU Lesser General Public License along with this program.  If
> +   not, see <http://www.gnu.org/licenses/>.  */
> +
> +#ifdef HAVE_CONFIG_H
> +# include <config.h>
> +#endif
> +
> +#include "libdwP.h"
> +
> +static Dwarf_Package_Index *
> +__libdw_read_package_index (Dwarf *dbg, bool tu)
> +{
> +  Elf_Data *data;
> +  if (tu)
> +    data = dbg->sectiondata[IDX_debug_tu_index];
> +  else
> +    data = dbg->sectiondata[IDX_debug_cu_index];
> +
> +  /* We need at least 16 bytes for the header.  */
> +  if (data == NULL || data->d_size < 16)
> +    {
> +    invalid:
> +      __libdw_seterrno (DWARF_E_INVALID_DWARF);
> +      return NULL;
> +    }
> +
> +  const unsigned char *datap = data->d_buf;
> +  const unsigned char *endp = datap + data->d_size;
> +  uint16_t version;
> +  /* In GNU DebugFission for DWARF 4, the version is 2 as a uword.  In the
> +     standardized DWARF 5 format, it is a uhalf followed by a padding uhalf.
> +     Check for both.  */
> +  if (read_4ubyte_unaligned (dbg, datap) == 2)
> +    version = 2;
> +  else
> +    {
> +      version = read_2ubyte_unaligned (dbg, datap);
> +      if (version != 5)
> +     {
> +       __libdw_seterrno (DWARF_E_VERSION);
> +       return NULL;
> +     }
> +    }
> +  datap += 4;
> +  uint32_t section_count = read_4ubyte_unaligned_inc (dbg, datap);
> +  uint32_t unit_count = read_4ubyte_unaligned_inc (dbg, datap);
> +  uint32_t slot_count = read_4ubyte_unaligned_inc (dbg, datap);
> +
> +  /* The specification has a stricter requirement that
> +     slot_count > 3 * unit_count / 2, but this is enough for us.  */
> +  if (slot_count < unit_count)
> +    goto invalid;
> +
> +  /* After the header, the section must contain:
> +
> +       8 byte signature per hash table slot
> +     + 4 byte index per hash table slot
> +     + Section offset table with 1 header row, 1 row per unit, 1 column per
> +       section, 4 bytes per field
> +     + Section size table with 1 row per unit, 1 column per section, 4 bytes
> +       per field
> +
> +     We have to be careful about overflow when checking this.  */
> +  const unsigned char *hash_table = datap;
> +  if ((size_t) (endp - hash_table) < (uint64_t) slot_count * 12)
> +    goto invalid;
> +  const unsigned char *indices = hash_table + (size_t) slot_count * 8;
> +  const unsigned char *sections = indices + (size_t) slot_count * 4;
> +  if ((size_t) (endp - sections) < (uint64_t) section_count * 4)
> +    goto invalid;
> +  const unsigned char *section_offsets = sections + (size_t) section_count * 
> 4;
> +  if ((uint64_t) unit_count * section_count > UINT64_MAX / 8
> +      || ((size_t) (endp - section_offsets)
> +       < (uint64_t) unit_count * section_count * 8))
> +    goto invalid;
> +  const unsigned char *section_sizes
> +    = section_offsets + (uint64_t) unit_count * section_count * 4;
> +
> +  Dwarf_Package_Index *index = malloc (sizeof (*index));
> +  if (index == NULL)
> +    {
> +      __libdw_seterrno (DWARF_E_NOMEM);
> +      return NULL;
> +    }
> +
> +  index->dbg = dbg;
> +  /* Set absent sections to UINT32_MAX.  */
> +  memset (index->sections, 0xff, sizeof (index->sections));
> +  for (size_t i = 0; i < section_count; i++)
> +    {
> +      uint32_t section = read_4ubyte_unaligned (dbg, sections + i * 4);
> +      /* 2 is DW_SECT_TYPES in version 2 and reserved in version 5.  We 
> ignore
> +         it for version 5.
> +      5 is DW_SECT_LOC in version 2 and DW_SECT_LOCLISTS in version 5.  We
> +      use the same index for both.
> +      7 is DW_SECT_MACINFO in version 2 and DW_SECT_MACRO in version 5.  We
> +      use the same index for both.
> +      8 is DW_SECT_MACRO in version 2 and DW_SECT_RNGLISTS in version 5.  We
> +      use the same index for version 2's DW_SECT_MACRO as version 2's
> +      DW_SECT_MACINFO/version 5's DW_SECT_MACRO.
> +      We ignore unknown sections.  */
> +      if (section == 0)
> +     continue;
> +      if (version == 2)
> +     {
> +       if (section > 8)
> +         continue;
> +       else if (section == 8)
> +         section = DW_SECT_MACRO;
> +     }
> +      else if (section == 2
> +            || (section
> +                > sizeof (index->sections) / sizeof (index->sections[0])))
> +     continue;
> +      index->sections[section - 1] = i;
> +    }
> +
> +  /* DW_SECT_INFO (or DW_SECT_TYPES for DWARF 4 type units) and 
> DW_SECT_ABBREV
> +     are required.  */
> +  if (((!tu || dbg->sectiondata[IDX_debug_types] == NULL)
> +       && index->sections[DW_SECT_INFO - 1] == UINT32_MAX)
> +      || (tu && dbg->sectiondata[IDX_debug_types] != NULL
> +       && index->sections[DW_SECT_TYPES - 1] == UINT32_MAX)
> +      || index->sections[DW_SECT_ABBREV - 1] == UINT32_MAX)
> +    {
> +      free (index);
> +      __libdw_seterrno (DWARF_E_INVALID_DWARF);
> +      return NULL;
> +    }
> +
> +  index->section_count = section_count;
> +  index->unit_count = unit_count;
> +  index->slot_count = slot_count;
> +  index->last_unit_found = 0;
> +  index->hash_table = hash_table;
> +  index->indices = indices;
> +  index->section_offsets = section_offsets;
> +  index->section_sizes = section_sizes;
> +
> +  return index;
> +}

Sets up Dwarf_Package_Index structure and associates it with a Dwarf.
Looks good. Thanks for the diligent overflow checks. 

> +static Dwarf_Package_Index *
> +__libdw_package_index (Dwarf *dbg, bool tu)
> +{
> +  if (tu && dbg->tu_index != NULL)
> +    return dbg->tu_index;
> +  else if (!tu && dbg->cu_index != NULL)
> +    return dbg->cu_index;
> +
> +  Dwarf_Package_Index *index = __libdw_read_package_index (dbg, tu);
> +  if (index == NULL)
> +    return NULL;
> +
> +  if (tu)
> +    dbg->tu_index = index;
> +  else
> +    dbg->cu_index = index;
> +  return index;
> +}

Gets the Dwarf_Package_Index associated with a Dwarf. OK.
(Will need locking in the future for thread-safety.)

> +static int
> +__libdw_dwp_unit_row (Dwarf_Package_Index *index, uint64_t unit_id,
> +                   uint32_t *unit_rowp)
> +{
> +  if (index == NULL)
> +    return -1;
> +
> +  uint32_t hash = unit_id;
> +  uint32_t hash2 = (unit_id >> 32) | 1;
> +  /* Only check each slot once.  */
> +  for (uint32_t n = index->slot_count; n-- > 0; )
> +    {
> +      size_t slot = hash & (index->slot_count - 1);
> +      uint64_t sig = read_8ubyte_unaligned (index->dbg,
> +                                         index->hash_table + slot * 8);
> +      if (sig == unit_id)
> +     {
> +       uint32_t row = read_4ubyte_unaligned (index->dbg,
> +                                             index->indices + slot * 4);
> +       if (row > index->unit_count)
> +         {
> +           __libdw_seterrno (DWARF_E_INVALID_DWARF);
> +           return -1;
> +         }
> +       *unit_rowp = row;
> +       return 0;
> +     }
> +      else if (sig == 0
> +            && read_4ubyte_unaligned (index->dbg,
> +                                      index->indices + slot * 4) == 0)
> +     break;
> +      hash += hash2;
> +    }
> +  *unit_rowp = 0;
> +  return 0;
> +}

Given a unit_id provides the row in the Dwarf_Package_Index if it
exists. rows start at 1, so returning 0 with *unit_rowp also 0
indicates lookup failure (returning -1 indicates the index is malformed
in some way). OK.

> +static int
> +__libdw_dwp_section_info (Dwarf_Package_Index *index, uint32_t unit_row,
> +                       unsigned int section, Dwarf_Off *offsetp,
> +                       Dwarf_Off *sizep)
> +{
> +  if (index == NULL)
> +    return -1;
> +  if (unit_row == 0)
> +    {
> +      __libdw_seterrno (DWARF_E_INVALID_DWARF);
> +      return -1;
> +    }
> +  if (index->sections[section - 1] == UINT32_MAX)
> +    {
> +      if (offsetp != NULL)
> +     *offsetp = 0;
> +      if (sizep != NULL)
> +     *sizep = 0;
> +      return 0;
> +    }
> +  size_t i = (size_t)(unit_row - 1) * index->section_count
> +          + index->sections[section - 1];
> +  if (offsetp != NULL)
> +    *offsetp = read_4ubyte_unaligned (index->dbg,
> +                                   index->section_offsets + i * 4);
> +  if (sizep != NULL)
> +    *sizep = read_4ubyte_unaligned (index->dbg,
> +                                 index->section_sizes + i * 4);
> +  return 0;
> +}

Given a row and section returns the offset and size into to index.
row being zero is an error. Doesn't need additional out of bounds
checks because those should have been checked in
__libdw_read_package_index. OK.

> +int
> +internal_function
> +__libdw_dwp_find_unit (Dwarf *dbg, bool debug_types, Dwarf_Off off,
> +                    uint16_t version, uint8_t unit_type, uint64_t unit_id8,
> +                    uint32_t *unit_rowp, Dwarf_Off *abbrev_offsetp)
> +{
> +  if (version >= 5
> +      && unit_type != DW_UT_split_compile && unit_type != DW_UT_split_type)
> +    {
> +    not_dwp:
> +      *unit_rowp = 0;
> +      *abbrev_offsetp = 0;
> +      return 0;
> +    }
> +  bool tu = unit_type == DW_UT_split_type || debug_types;
> +  if (dbg->sectiondata[tu ? IDX_debug_tu_index : IDX_debug_cu_index] == NULL)
> +    goto not_dwp;
> +  Dwarf_Package_Index *index = __libdw_package_index (dbg, tu);
> +  if (index == NULL)
> +    return -1;
> +
> +  /* This is always called for ascending offsets.  The most obvious way for a
> +     producer to generate the section offset table is sorted by offset; both
> +     GNU dwp and llvm-dwp do this.  In this common case, we can avoid the 
> full
> +     lookup.  */
> +  if (index->last_unit_found < index->unit_count)
> +    {
> +      Dwarf_Off offset, size;
> +      if (__libdw_dwp_section_info (index, index->last_unit_found + 1,
> +                                 debug_types ? DW_SECT_TYPES : DW_SECT_INFO,
> +                                 &offset, &size) != 0)
> +     return -1;
> +      if (offset <= off && off - offset < size)
> +     {
> +       *unit_rowp = ++index->last_unit_found;
> +       goto done;
> +     }
> +      else
> +     /* The units are not sorted. Don't try again.  */
> +     index->last_unit_found = index->unit_count;
> +    }
> +
> +  if (version >= 5 || debug_types)
> +    {
> +      /* In DWARF 5 and in type units, the unit signature is available in the
> +         unit header.  */
> +      if (__libdw_dwp_unit_row (index, unit_id8, unit_rowp) != 0)
> +     return -1;
> +    }
> +  else
> +    {
> +      /* In DWARF 4 compilation units, the unit signature is an attribute.  
> We
> +      can't parse attributes in the split unit until we get the abbreviation
> +      table offset from the package index, which is a chicken-and-egg
> +      problem.  We could get the signature from the skeleton unit, but that
> +      may not be available.
> +
> +      Instead, we resort to a linear scan through the section offset table.
> +      Finding all units is therefore quadratic in the number of units.
> +      However, this will likely never be needed in practice because of the
> +      sorted fast path above.  If this ceases to be the case, we can try to
> +      plumb through the skeleton unit's signature when it is available, or
> +      build a sorted lookup table for binary search.  */
> +      if (index->sections[DW_SECT_INFO - 1] == UINT32_MAX)
> +     {
> +       __libdw_seterrno (DWARF_E_INVALID_DWARF);
> +       return -1;
> +     }
> +      for (uint32_t i = 0; i < index->unit_count; i++)
> +     {
> +       Dwarf_Off offset, size;
> +       __libdw_dwp_section_info (index, i + 1, DW_SECT_INFO, &offset,
> +                                 &size);
> +       if (offset <= off && off - offset < size)
> +         {
> +           *unit_rowp = i + 1;
> +           goto done;
> +         }
> +     }
> +      __libdw_seterrno (DWARF_E_INVALID_DWARF);
> +      return -1;
> +    }
> +
> + done:
> +  return __libdw_dwp_section_info (index, *unit_rowp, DW_SECT_ABBREV,
> +                                abbrev_offsetp, NULL);
> +}

As used by __libdw_intern_next_unit. OK.

> +int
> +dwarf_cu_dwp_section_info (Dwarf_CU *cu, unsigned int section,
> +                        Dwarf_Off *offsetp, Dwarf_Off *sizep)
> +{
> +  if (cu == NULL)
> +    return -1;
> +  if (section < DW_SECT_INFO || section > DW_SECT_RNGLISTS)
> +    {
> +      __libdw_seterrno (DWARF_E_UNKNOWN_SECTION);
> +      return -1;
> +    }
> +  if (cu->dwp_row == 0)
> +    {
> +      if (offsetp != NULL)
> +     *offsetp = 0;
> +      if (sizep != NULL)
> +     *sizep = 0;
> +      return 0;
> +    }
> +  else
> +    {
> +      Dwarf_Package_Index *index
> +     = cu->unit_type == DW_UT_split_compile
> +     ? cu->dbg->cu_index : cu->dbg->tu_index;
> +      return __libdw_dwp_section_info (index, cu->dwp_row, section, offsetp,
> +                                    sizep);
> +    }
> +}
> +INTDEF(dwarf_cu_dwp_section_info)

To wrap it all up. Probably a good idea to limit the sections to those
known (although the code could in principle recognize others, it
currently ignores unknown section types). OK.

> diff --git a/libdw/dwarf_end.c b/libdw/dwarf_end.c
> index e51d5dd7..b7f817d9 100644
> --- a/libdw/dwarf_end.c
> +++ b/libdw/dwarf_end.c
> @@ -77,6 +77,9 @@ dwarf_end (Dwarf *dwarf)
>  {
>    if (dwarf != NULL)
>      {
> +      free (dwarf->tu_index);
> +      free (dwarf->cu_index);
> +
>        if (dwarf->cfi != NULL)
>       /* Clean up the CFI cache.  */
>       __libdw_destroy_frame_cache (dwarf->cfi);

OK.

> diff --git a/libdw/dwarf_error.c b/libdw/dwarf_error.c
> index 46ea16b3..0123cfa2 100644
> --- a/libdw/dwarf_error.c
> +++ b/libdw/dwarf_error.c
> @@ -102,6 +102,7 @@ static const char *errmsgs[] =
>      [DWARF_E_NOT_CUDIE] = N_("not a CU (unit) DIE"),
>      [DWARF_E_UNKNOWN_LANGUAGE] = N_("unknown language code"),
>      [DWARF_E_NO_DEBUG_ADDR] = N_(".debug_addr section missing"),
> +    [DWARF_E_UNKNOWN_SECTION] = N_("unknown section"),
>    };
>  #define nerrmsgs (sizeof (errmsgs) / sizeof (errmsgs[0]))
> 

OK.
 
> diff --git a/libdw/libdw.h b/libdw/libdw.h
> index 64d1689a..545ad043 100644
> --- a/libdw/libdw.h
> +++ b/libdw/libdw.h
> @@ -1081,6 +1081,29 @@ extern int dwarf_frame_register (Dwarf_Frame *frame, 
> int regno,
>    __nonnull_attribute__ (3, 4, 5);
>  
>  
> +/* Return offset and/or size of CU's contribution to SECTION in a DWARF 
> package
> +   file.
> +
> +   If CU is not from a DWARF package file, the file does not have SECTION, 
> or CU
> +   does not contribute to SECTION, then *SIZEP is set to 0.
> +
> +   SECTION is a DW_SECT section identifier.  Note that the original GNU DWARF
> +   package file extension for DWARF 4 used slightly different section
> +   identifiers.  This function uses the standardized section identifiers and
> +   maps the GNU DWARF 4 identifiers to their standard DWARF 5 analogues:
> +   DW_SECT_LOCLISTS (5) refers to .debug_locs.dwo for DWARF 4.
> +   DW_SECT_MACRO (7) refers to .debug_macinfo.dwo for DWARF 4 or
> +   .debug_macro.dwo for the GNU .debug_macro extension for DWARF 4 (section
> +   identifier 8 is DW_SECT_RNGLISTS in DWARF 5, NOT DW_SECT_MACRO like in the
> +   GNU extension.)
> +   .debug_types.dwo does not have a DWARF 5 equivalent, so this function 
> accepts
> +   the original DW_SECT_TYPES (2).
> +
> +   Returns 0 for success or -1 for errors.  OFFSETP and SIZEP may be NULL.  
> */
> +extern int dwarf_cu_dwp_section_info (Dwarf_CU *cu, unsigned int section,
> +                                   Dwarf_Off *offsetp, Dwarf_Off *sizep);
> +
> +
>  /* Return error code of last failing function call.  This value is kept
>     separately for each thread.  */

OK. char count is at max 80.

>  extern int dwarf_errno (void);
> diff --git a/libdw/libdw.map b/libdw/libdw.map
> index 5331ad45..3c5ce8dc 100644
> --- a/libdw/libdw.map
> +++ b/libdw/libdw.map
> @@ -373,3 +373,8 @@ ELFUTILS_0.188 {
>      dwfl_frame_reg;
>      dwfl_report_offline_memory;
>  } ELFUTILS_0.186;
> +
> +ELFUTILS_0.191 {
> +  global:
> +    dwarf_cu_dwp_section_info;
> +} ELFUTILS_0.188;

OK. We shouldn't forget to add a NEWS entry for this.

> diff --git a/libdw/libdwP.h b/libdw/libdwP.h
> index aef42267..7f8d69b5 100644
> --- a/libdw/libdwP.h
> +++ b/libdw/libdwP.h
> @@ -147,6 +147,7 @@ enum
>    DWARF_E_NOT_CUDIE,
>    DWARF_E_UNKNOWN_LANGUAGE,
>    DWARF_E_NO_DEBUG_ADDR,
> +  DWARF_E_UNKNOWN_SECTION,
>  };
> 

OK.

>  
> @@ -231,6 +232,11 @@ struct Dwarf
>    /* Cached info from the CFI section.  */
>    struct Dwarf_CFI_s *cfi;
>  
> +  /* DWARF package file CU index section.  */
> +  struct Dwarf_Package_Index_s *cu_index;
> +  /* DWARF package file TU index section.  */
> +  struct Dwarf_Package_Index_s *tu_index;
> +
>    /* Fake loc CU.  Used when synthesizing attributes for Dwarf_Ops that
>       came from a location list entry in dwarf_getlocation_attr.
>       Depending on version this is the .debug_loc or .debug_loclists

OK.

> @@ -343,6 +349,23 @@ struct Dwarf_Aranges_s
>    } info[0];
>  };
>  
> +/* DWARF package file unit index.  */
> +typedef struct Dwarf_Package_Index_s
> +{
> +  Dwarf *dbg;
> +  uint32_t section_count;
> +  uint32_t unit_count;
> +  uint32_t slot_count;
> +  /* Mapping from DW_SECT_* - 1 to column number in the section tables, or
> +     UINT32_MAX if not present.  */
> +  uint32_t sections[DW_SECT_RNGLISTS];
> +  /* Row number of last unit found in the index.  */
> +  uint32_t last_unit_found;
> +  const unsigned char *hash_table;
> +  const unsigned char *indices;
> +  const unsigned char *section_offsets;
> +  const unsigned char *section_sizes;
> +} Dwarf_Package_Index;
>  
>  /* CU representation.  */
>  struct Dwarf_CU

OK.

> @@ -350,6 +373,8 @@ struct Dwarf_CU
>    Dwarf *dbg;
>    Dwarf_Off start;
>    Dwarf_Off end;
> +  /* Row number of this unit in DWARF package file index.  */
> +  uint32_t dwp_row;
>    uint8_t address_size;
>    uint8_t offset_size;
>    uint16_t version;

OK.

> @@ -684,6 +709,13 @@ extern struct Dwarf *__libdw_find_split_dbg_addr (Dwarf 
> *dbg, void *addr)
>  extern struct Dwarf_CU *__libdw_find_split_unit (Dwarf_CU *cu)
>       internal_function;
>  
> +/* Find a unit in a DWARF package file for __libdw_intern_next_unit.  */
> +extern int __libdw_dwp_find_unit (Dwarf *dbg, bool debug_types, Dwarf_Off 
> off,
> +                               uint16_t version, uint8_t unit_type,
> +                               uint64_t unit_id8, uint32_t *unit_rowp,
> +                               Dwarf_Off *abbrev_offsetp)
> +     __nonnull_attribute__ (1, 7, 8) internal_function;
> +
>  /* Get abbreviation with given code.  */
>  extern Dwarf_Abbrev *__libdw_findabbrev (struct Dwarf_CU *cu,
>                                        unsigned int code)

OK.

> @@ -1388,6 +1420,7 @@ INTDECL (dwarf_attr_integrate)
>  INTDECL (dwarf_begin)
>  INTDECL (dwarf_begin_elf)
>  INTDECL (dwarf_child)
> +INTDECL (dwarf_cu_dwp_section_info)
>  INTDECL (dwarf_default_lower_bound)
>  INTDECL (dwarf_dieoffset)
>  INTDECL (dwarf_diename)

OK.

> diff --git a/libdw/libdw_findcu.c b/libdw/libdw_findcu.c
> index ed744231..6c7dcfb5 100644
> --- a/libdw/libdw_findcu.c
> +++ b/libdw/libdw_findcu.c
> @@ -143,6 +143,13 @@ __libdw_intern_next_unit (Dwarf *dbg, bool debug_types)
>    if (unlikely (*offsetp > data->d_size))
>      *offsetp = data->d_size;
>  
> +  uint32_t dwp_row;
> +  Dwarf_Off dwp_abbrev_offset;
> +  if (__libdw_dwp_find_unit (dbg, debug_types, oldoff, version, unit_type,
> +                          unit_id8, &dwp_row, &dwp_abbrev_offset) != 0)
> +    return NULL;
> +  abbrev_offset += dwp_abbrev_offset;
> +
>    /* Create an entry for this CU.  */
>    struct Dwarf_CU *newp = libdw_typed_alloc (dbg, struct Dwarf_CU);

__libdw_dwp_find_unit will return zero (and set raw and offset to zero)
if the dbg isn't a dwp or the unit_type. OK.
 
> @@ -150,6 +157,7 @@ __libdw_intern_next_unit (Dwarf *dbg, bool debug_types)
>    newp->sec_idx = sec_idx;
>    newp->start = oldoff;
>    newp->end = *offsetp;
> +  newp->dwp_row = dwp_row;
>    newp->address_size = address_size;
>    newp->offset_size = offset_size;
>    newp->version = version;

OK.

> diff --git a/tests/.gitignore b/tests/.gitignore
> index 5bebb2c4..0caabf25 100644
> --- a/tests/.gitignore
> +++ b/tests/.gitignore
> @@ -28,6 +28,7 @@
>  /backtrace-dwarf
>  /buildid
>  /core-dump-backtrace.lock
> +/cu-dwp-section-info
>  /debugaltlink
>  /debuginfod_build_id_find
>  /debuglink

OK.

> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 2373c980..34014570 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -62,7 +62,7 @@ check_PROGRAMS = arextract arsymtest newfile saridx 
> scnnames sectiondump \
>                 dwelf_elf_e_machine_string \
>                 getphdrnum leb128 read_unaligned \
>                 msg_tst system-elf-libelf-test system-elf-gelf-test \
> -               nvidia_extended_linemap_libdw \
> +               nvidia_extended_linemap_libdw cu-dwp-section-info \
>                 $(asm_TESTS)
>  
>  asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \
> @@ -212,7 +212,7 @@ TESTS = run-arextract.sh run-arsymtest.sh run-ar.sh 
> newfile test-nlist \
>       $(asm_TESTS) run-disasm-bpf.sh run-low_high_pc-dw-form-indirect.sh \
>       run-nvidia-extended-linemap-libdw.sh 
> run-nvidia-extended-linemap-readelf.sh \
>       run-readelf-dw-form-indirect.sh run-strip-largealign.sh \
> -     run-readelf-Dd.sh run-dwfl-core-noncontig.sh
> +     run-readelf-Dd.sh run-dwfl-core-noncontig.sh run-cu-dwp-section-info.sh
>  
>  if !BIARCH
>  export ELFUTILS_DISABLE_BIARCH = 1
> @@ -634,7 +634,11 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh 
> \
>            testfile-largealign.o.bz2 run-strip-largealign.sh \
>            run-funcretval++11.sh \
>            test-ar-duplicates.a.bz2 \
> -          run-dwfl-core-noncontig.sh testcore-noncontig.bz2
> +          run-dwfl-core-noncontig.sh testcore-noncontig.bz2 \
> +          testfile-dwp-4.bz2 testfile-dwp-4.dwp.bz2 \
> +          testfile-dwp-4-strict.bz2 testfile-dwp-4-strict.dwp.bz2 \
> +          testfile-dwp-5.bz2 testfile-dwp-5.dwp.bz2 testfile-dwp.source \
> +          run-cu-dwp-section-info.sh
>  
>  
>  if USE_VALGRIND

OK. This hunk doesn't apply anymore, but easily fixed.

> @@ -810,6 +814,7 @@ getphdrnum_LDADD = $(libelf) $(libdw)
>  leb128_LDADD = $(libelf) $(libdw)
>  read_unaligned_LDADD = $(libelf) $(libdw)
>  nvidia_extended_linemap_libdw_LDADD = $(libelf) $(libdw)
> +cu_dwp_section_info_LDADD = $(libdw)
>  
>  # We want to test the libelf headers against the system elf.h header.
>  # Don't include any -I CPPFLAGS. Except when we install our own elf.h.

Likewise.

> diff --git a/tests/cu-dwp-section-info.c b/tests/cu-dwp-section-info.c
> new file mode 100644
> index 00000000..f1756979
> --- /dev/null
> +++ b/tests/cu-dwp-section-info.c
> @@ -0,0 +1,73 @@
> +/* Test program for dwarf_cu_dwp_section_info
> +   Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
> +   This file is part of elfutils.
> +
> +   This file is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   elfutils is distributed in the hope that it will be useful, but
> +   WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
> +
> +#ifdef HAVE_CONFIG_H
> +# include <config.h>
> +#endif
> +#include <stdio.h>
> +#include <inttypes.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +
> +#include <dwarf.h>
> +#include ELFUTILS_HEADER(dw)
> +
> +int
> +main (int argc, char *argv[])
> +{
> +  for (int i = 1; i < argc; i++)
> +    {
> +      printf ("file: %s\n", argv[i]);
> +      int fd = open (argv[i], O_RDONLY);
> +      Dwarf *dbg = dwarf_begin (fd, DWARF_C_READ);
> +      if (dbg == NULL)
> +     {
> +       printf ("%s not usable: %s\n", argv[i], dwarf_errmsg (-1));
> +       return -1;
> +     }
> +
> +      Dwarf_CU *cu = NULL;
> +      while (dwarf_get_units (dbg, cu, &cu, NULL, NULL, NULL, NULL) == 0)
> +     {
> +#define SECTION_INFO(section) do {                                   \
> +         printf (#section ": ");                                     \
> +         Dwarf_Off offset, size;                                     \
> +         if (dwarf_cu_dwp_section_info (cu, DW_SECT_##section,       \
> +                                        &offset, &size) == 0)        \
> +           printf ("0x%" PRIx64 " 0x%" PRIx64 "\n", offset, size);   \
> +         else                                                        \
> +           printf ("%s\n", dwarf_errmsg (-1));                       \
> +       } while (0)
> +       SECTION_INFO (INFO);
> +       SECTION_INFO (TYPES);
> +       SECTION_INFO (ABBREV);
> +       SECTION_INFO (LINE);
> +       SECTION_INFO (LOCLISTS);
> +       SECTION_INFO (STR_OFFSETS);
> +       SECTION_INFO (MACRO);
> +       SECTION_INFO (RNGLISTS);
> +       printf ("\n");
> +     }
> +
> +      dwarf_end (dbg);
> +      close (fd);
> +    }
> +
> +  return 0;
> +}

Nice test.

> diff --git a/tests/run-cu-dwp-section-info.sh 
> b/tests/run-cu-dwp-section-info.sh
> new file mode 100755
> index 00000000..202319c6
> --- /dev/null
> +++ b/tests/run-cu-dwp-section-info.sh
> @@ -0,0 +1,168 @@
> +#! /bin/sh
> +# Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
> +# This file is part of elfutils.
> +#
> +# This file is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 3 of the License, or
> +# (at your option) any later version.
> +#
> +# elfutils is distributed in the hope that it will be useful, but
> +# WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see <http://www.gnu.org/licenses/>.
> +
> +. $srcdir/test-subr.sh
> +
> +# See testfile-dwp.source.
> +testfiles testfile-dwp-5.dwp testfile-dwp-4.dwp testfile-dwp-4-strict.dwp
> +
> +testrun_compare ${abs_builddir}/cu-dwp-section-info testfile-dwp-5.dwp << EOF
> +file: testfile-dwp-5.dwp
> +INFO: 0x0 0x70
> +TYPES: 0x0 0x0
> +ABBREV: 0x0 0x160
> +LINE: 0x0 0x7f
> +LOCLISTS: 0x0 0xdb
> +STR_OFFSETS: 0x0 0x75c
> +MACRO: 0x0 0x6c6
> +RNGLISTS: 0x0 0x22
> +
> +INFO: 0x70 0x108
> +TYPES: 0x0 0x0
> +ABBREV: 0x0 0x160
> +LINE: 0x0 0x7f
> +LOCLISTS: 0x0 0xdb
> +STR_OFFSETS: 0x0 0x75c
> +MACRO: 0x0 0x6c6
> +RNGLISTS: 0x0 0x22
> +
> +INFO: 0x178 0x6e
> +TYPES: 0x0 0x0
> +ABBREV: 0x160 0xca
> +LINE: 0x7f 0x7f
> +LOCLISTS: 0x0 0x0
> +STR_OFFSETS: 0x75c 0x758
> +MACRO: 0x6c6 0x6c5
> +RNGLISTS: 0x0 0x0
> +
> +INFO: 0x1e6 0x78
> +TYPES: 0x0 0x0
> +ABBREV: 0x160 0xca
> +LINE: 0x7f 0x7f
> +LOCLISTS: 0x0 0x0
> +STR_OFFSETS: 0x75c 0x758
> +MACRO: 0x6c6 0x6c5
> +RNGLISTS: 0x0 0x0
> +
> +INFO: 0x25e 0x193
> +TYPES: 0x0 0x0
> +ABBREV: 0x22a 0x18a
> +LINE: 0xfe 0x81
> +LOCLISTS: 0xdb 0xc9
> +STR_OFFSETS: 0xeb4 0x77c
> +MACRO: 0xd8b 0x6c6
> +RNGLISTS: 0x22 0x43
> +
> +EOF
> +
> +testrun_compare ${abs_builddir}/cu-dwp-section-info testfile-dwp-4.dwp << EOF
> +file: testfile-dwp-4.dwp
> +INFO: 0x0 0x11e
> +TYPES: 0x0 0x0
> +ABBREV: 0x0 0x172
> +LINE: 0x0 0x52
> +LOCLISTS: 0x0 0x11b
> +STR_OFFSETS: 0x0 0x754
> +MACRO: 0x0 0x6c7
> +RNGLISTS: 0x0 0x0
> +
> +INFO: 0x11e 0x76
> +TYPES: 0x0 0x0
> +ABBREV: 0x172 0xd7
> +LINE: 0x52 0x52
> +LOCLISTS: 0x0 0x0
> +STR_OFFSETS: 0x754 0x750
> +MACRO: 0x6c7 0x6c6
> +RNGLISTS: 0x0 0x0
> +
> +INFO: 0x194 0x1c5
> +TYPES: 0x0 0x0
> +ABBREV: 0x249 0x19e
> +LINE: 0xa4 0x53
> +LOCLISTS: 0x11b 0xf1
> +STR_OFFSETS: 0xea4 0x774
> +MACRO: 0xd8d 0x6c7
> +RNGLISTS: 0x0 0x0
> +
> +INFO: 0x0 0x0
> +TYPES: 0x0 0x6f
> +ABBREV: 0x0 0x172
> +LINE: 0x0 0x52
> +LOCLISTS: 0x0 0x11b
> +STR_OFFSETS: 0x0 0x754
> +MACRO: 0x0 0x6c7
> +RNGLISTS: 0x0 0x0
> +
> +INFO: 0x0 0x0
> +TYPES: 0x6f 0x6d
> +ABBREV: 0x172 0xd7
> +LINE: 0x52 0x52
> +LOCLISTS: 0x0 0x0
> +STR_OFFSETS: 0x754 0x750
> +MACRO: 0x6c7 0x6c6
> +RNGLISTS: 0x0 0x0
> +
> +EOF
> +
> +testrun_compare ${abs_builddir}/cu-dwp-section-info 
> testfile-dwp-4-strict.dwp << EOF
> +file: testfile-dwp-4-strict.dwp
> +INFO: 0x0 0x105
> +TYPES: 0x0 0x0
> +ABBREV: 0x0 0x15f
> +LINE: 0x0 0x52
> +LOCLISTS: 0x0 0xe2
> +STR_OFFSETS: 0x0 0x24
> +MACRO: 0x0 0x38e4
> +RNGLISTS: 0x0 0x0
> +
> +INFO: 0x105 0x72
> +TYPES: 0x0 0x0
> +ABBREV: 0x15f 0xd3
> +LINE: 0x52 0x52
> +LOCLISTS: 0x0 0x0
> +STR_OFFSETS: 0x24 0x20
> +MACRO: 0x38e4 0x38db
> +RNGLISTS: 0x0 0x0
> +
> +INFO: 0x177 0x17b
> +TYPES: 0x0 0x0
> +ABBREV: 0x232 0x157
> +LINE: 0xa4 0x53
> +LOCLISTS: 0xe2 0xb1
> +STR_OFFSETS: 0x44 0x44
> +MACRO: 0x71bf 0x38f5
> +RNGLISTS: 0x0 0x0
> +
> +INFO: 0x0 0x0
> +TYPES: 0x0 0x6e
> +ABBREV: 0x0 0x15f
> +LINE: 0x0 0x52
> +LOCLISTS: 0x0 0xe2
> +STR_OFFSETS: 0x0 0x24
> +MACRO: 0x0 0x38e4
> +RNGLISTS: 0x0 0x0
> +
> +INFO: 0x0 0x0
> +TYPES: 0x6e 0x6b
> +ABBREV: 0x15f 0xd3
> +LINE: 0x52 0x52
> +LOCLISTS: 0x0 0x0
> +STR_OFFSETS: 0x24 0x20
> +MACRO: 0x38e4 0x38db
> +RNGLISTS: 0x0 0x0
> +
> +EOF

OK.

> diff --git a/tests/testfile-dwp.source b/tests/testfile-dwp.source
> new file mode 100644
> index 00000000..b0b0a97c
> --- /dev/null
> +++ b/tests/testfile-dwp.source
> @@ -0,0 +1,102 @@
> +# Nonsensical program used to generate example DWARF package files with type
> +# units, location lists, range lists, and macros.
> +
> +# = foobar.h =
> +
> +struct Foo
> +{
> +  int a, b;
> +  int foo ();
> +};
> +
> +struct Bar
> +{
> +  long a, b;
> +  long bar ();
> +};
> +
> +#define FROB(x) ((x) ^ 0x2a2a2a2a)
> +#define FRY(x) ((x) * 0x100000001b3)
> +
> +inline long
> +fibonacci (unsigned int n)
> +{
> +  if (n == 0)
> +    return 0;
> +  else
> +    {
> +      long a = 0;
> +      long b = 1;
> +      for (unsigned int i = 2; i < n; i++)
> +     {
> +       long tmp = a + b;
> +       a = b;
> +       b = tmp;
> +     }
> +      return b;
> +    }
> +}
> +
> +# = foo.cc =
> +
> +#include "foobar.h"
> +
> +#define ZERO() (1 - 1)
> +
> +int
> +x_x (int x)
> +{
> +  for (int i = x; i > ZERO(); i--)
> +    x *= x;
> +  return x;
> +}
> +
> +int
> +Foo::foo ()
> +{
> +  int x = a;
> +  if (a > b)
> +    x -= b;
> +  return FROB (x_x (x));
> +}
> +
> +# = bar.cc =
> +
> +#include "foobar.h"
> +
> +#define ONE 1
> +
> +long
> +Bar::bar ()
> +{
> +  if (a == b)
> +    return ONE;
> +  else
> +    return a > b ? b : a;
> +}
> +
> +# = main.cc =
> +
> +#include "foobar.h"
> +
> +#define MAIN_ARGS int argc, char **argv
> +
> +int
> +main(MAIN_ARGS)
> +{
> +  struct Foo myfoo { argc, FROB (argc) };
> +  struct Bar mybar { fibonacci (argc), FRY (argc) };
> +  return myfoo.foo() + mybar.bar();
> +}
> +
> +# Built with GCC at commit 80048aa13a6b ("debug/111409 - don't generate 
> COMDAT
> +# macro sections for split DWARF").
> +$ g++ -gdwarf-5 -gsplit-dwarf -fdebug-types-section -g3 -O2 foo.cc bar.cc 
> main.cc -o testfile-dwp-5
> +# GNU dwp as of binutils 2.41 only supports DWARF 4.
> +$ llvm-dwp -e testfile-dwp-5 -o testfile-dwp-5.dwp
> +
> +$ g++ -gdwarf-4 -gsplit-dwarf -fdebug-types-section -g3 -O2 foo.cc bar.cc 
> main.cc -o testfile-dwp-4
> +$ dwp -e testfile-dwp-4
> +
> +$ g++ -gdwarf-4 -gstrict-dwarf -gsplit-dwarf -fdebug-types-section -g3 -O2 
> foo.cc bar.cc main.cc -o testfile-dwp-4-strict
> +$ dwp -e testfile-dwp-4-strict

Fun.

Reply via email to