Hi Aaron,
On Sun, 2025-06-22 at 20:00 -0400, Aaron Merey wrote:
> Improve __libdw_dieabbrev performance by removing abbrev_lock. This
> lock protects the Dwarf_Die abbrev member due to lazy loading.
>
> Instead, eagerly load abbrev during Dwarf_Die initialization so that
> the member is readonly and requires no locking.
Could you explain a bit more how this works?
As far as I understand you made the following changes:
- Made the CUDIE and SUBDIE macros into static inline functions.
- This made it so that the usage in dwarf_decl_file and getfiles
needed to introduce a temporary Dwarf_Die variable (fine).
- The new functions return a Dwarf_Die which have all fields
initialized, not just .cu and .addr.
- Specifically they call dwarf_tag on the to be returned Dwarf_Die.
This seems to be done to trigger a call to __libdw_dieabbrev
(die, NULL). If so, why not do that directly?
- __libdw_dieabbrev has the abbrev_lock removed.
And no longer sets die->abbrev on failure (why?)
- __libdw_dieabbrev now calls __libdw_findabbrev (die->cu, code)
without any lock.
- __libdw_findabbrev calls Dwarf_Abbrev_Hash_find on cu->abbrev_hash,
which should be fine, since it is a concurrent hash table.
- But then if not found it checks cu->last_abbrev_offset which
I don't understand how that is fine without any locking. It
is later modified in this function. And it seems there can be
concurrent calls for the same cu?
- __libdw_findabbrev then calls __libdw_getabbrev which uses
Dwarf_Abbrev_Hash_find and Dwarf_Abbrev_Hash_insert on the
cu->abbrev_hash which should be fine because that is the
concurrent hash table also used above.
So I think this is almost, but not entirely thread-safe, with the
locking. We need some solution for the concurrent last_abbrev_offset
manipulation.
> Signed-off-by: Aaron Merey <[email protected]>
> ---
> libdw/dwarf_decl_file.c | 3 +-
> libdw/dwarf_end.c | 1 -
> libdw/dwarf_getscopevar.c | 3 +-
> libdw/libdwP.h | 65 +++++++++++++++++----------------------
> libdw/libdw_findcu.c | 1 -
> 5 files changed, 32 insertions(+), 41 deletions(-)
>
> diff --git a/libdw/dwarf_decl_file.c b/libdw/dwarf_decl_file.c
> index 1b91a4e9..03040e12 100644
> --- a/libdw/dwarf_decl_file.c
> +++ b/libdw/dwarf_decl_file.c
> @@ -50,7 +50,8 @@ dwarf_decl_file (Dwarf_Die *die)
>
> Dwarf_Files *files;
> size_t nfiles;
> - if (INTUSE(dwarf_getsrcfiles) (&CUDIE (attr_mem.cu), &files, &nfiles) != 0)
> + Dwarf_Die cudie = CUDIE (attr_mem.cu);
> + if (INTUSE(dwarf_getsrcfiles) (&cudie, &files, &nfiles) != 0)
> return NULL;
>
> if (idx >= nfiles)
> diff --git a/libdw/dwarf_end.c b/libdw/dwarf_end.c
> index 1628e448..51021d64 100644
> --- a/libdw/dwarf_end.c
> +++ b/libdw/dwarf_end.c
> @@ -68,7 +68,6 @@ cu_free (void *arg)
> && p != p->dbg->fake_addr_cu)
> {
> Dwarf_Abbrev_Hash_free (&p->abbrev_hash);
> - rwlock_fini (p->abbrev_lock);
> rwlock_fini (p->split_lock);
> mutex_fini (p->src_lock);
> mutex_fini (p->str_off_base_lock);
> diff --git a/libdw/dwarf_getscopevar.c b/libdw/dwarf_getscopevar.c
> index 7b1416f3..306ccae0 100644
> --- a/libdw/dwarf_getscopevar.c
> +++ b/libdw/dwarf_getscopevar.c
> @@ -40,7 +40,8 @@
> static int
> getfiles (Dwarf_Die *die, Dwarf_Files **files)
> {
> - return INTUSE(dwarf_getsrcfiles) (&CUDIE (die->cu), files, NULL);
> + Dwarf_Die cudie = CUDIE (die->cu);
> + return INTUSE(dwarf_getsrcfiles) (&cudie, files, NULL);
> }
>
> /* Fetch an attribute that should have a constant integer form. */
> diff --git a/libdw/libdwP.h b/libdw/libdwP.h
> index de80cd4e..4a528c9c 100644
> --- a/libdw/libdwP.h
> +++ b/libdw/libdwP.h
> @@ -455,10 +455,6 @@ struct Dwarf_CU
> Don't access directly, call __libdw_cu_locs_base. */
> Dwarf_Off locs_base;
>
> - /* Synchronize access to the abbrev member of a Dwarf_Die that
> - refers to this Dwarf_CU. Covers __libdw_die_abbrev. */
> - rwlock_define(, abbrev_lock);
> -
> /* Synchronize access to the split member of this Dwarf_CU.
> Covers __libdw_find_split_unit. */
> rwlock_define(, split_lock);
> @@ -595,23 +591,6 @@ __libdw_first_die_off_from_cu (struct Dwarf_CU *cu)
> cu->unit_type);
> }
>
> -#define CUDIE(fromcu)
> \
> - ((Dwarf_Die)
> \
> - { \
> - .cu = (fromcu), \
> - .addr = ((char *) (fromcu)->dbg->sectiondata[cu_sec_idx
> (fromcu)]->d_buf \
> - + __libdw_first_die_off_from_cu (fromcu)) \
> - })
> -
> -#define SUBDIE(fromcu)
> \
> - ((Dwarf_Die)
> \
> - { \
> - .cu = (fromcu), \
> - .addr = ((char *) (fromcu)->dbg->sectiondata[cu_sec_idx
> (fromcu)]->d_buf \
> - + (fromcu)->start + (fromcu)->subdie_offset) \
> - })
> -
> -
> /* Prototype of a single .debug_macro operator. */
> typedef struct
> {
> @@ -820,12 +799,7 @@ __nonnull_attribute__ (1)
> __libdw_dieabbrev (Dwarf_Die *die, const unsigned char **readp)
> {
> if (unlikely (die->cu == NULL))
> - {
> - die->abbrev = DWARF_END_ABBREV;
> - return DWARF_END_ABBREV;
> - }
> -
> - rwlock_wrlock (die->cu->abbrev_lock);
> + return DWARF_END_ABBREV;
>
> /* Do we need to get the abbreviation, or need to read after the code? */
> if (die->abbrev == NULL || readp != NULL)
> @@ -835,25 +809,20 @@ __libdw_dieabbrev (Dwarf_Die *die, const unsigned char
> **readp)
> const unsigned char *addr = die->addr;
>
> if (addr >= (const unsigned char *) die->cu->endp)
> - {
> - die->abbrev = DWARF_END_ABBREV;
> - rwlock_unlock (die->cu->abbrev_lock);
> - return DWARF_END_ABBREV;
> - }
> + return DWARF_END_ABBREV;
>
> get_uleb128 (code, addr, die->cu->endp);
> if (readp != NULL)
> *readp = addr;
>
> - /* Find the abbreviation. */
> + /* Find the abbreviation. To improve performance, this write is not
> + protected by a lock. It should only be reachable by a single thread
> + when initializing the DIE. */
> if (die->abbrev == NULL)
> die->abbrev = __libdw_findabbrev (die->cu, code);
> }
>
> - Dwarf_Abbrev *result = die->abbrev;
> - rwlock_unlock (die->cu->abbrev_lock);
> -
> - return result;
> + return die->abbrev;
> }
>
> /* Helper functions for form handling. */
> @@ -1112,6 +1081,28 @@ cu_sec_idx (struct Dwarf_CU *cu)
> return cu->sec_idx;
> }
>
> +static inline Dwarf_Die
> +CUDIE (Dwarf_CU *fromcu)
> +{
> + Dwarf_Die res = {0};
> + res.cu = fromcu;
> + res.addr = ((char *) (fromcu)->dbg->sectiondata[cu_sec_idx (fromcu)]->d_buf
> + + __libdw_first_die_off_from_cu (fromcu));
> + (void) dwarf_tag (&res); /* Set .abbrev */
> + return res;
> +}
> +
> +static inline Dwarf_Die
> +SUBDIE (Dwarf_CU *fromcu)
> +{
> + Dwarf_Die res = {0};
> + res.cu = fromcu;
> + res.addr = ((char *) (fromcu)->dbg->sectiondata[cu_sec_idx (fromcu)]->d_buf
> + + (fromcu)->start + (fromcu)->subdie_offset);
> + (void) dwarf_tag (&res); /* Set .abbrev */
> + return res;
> +}
> +
> static inline bool
> is_cudie (Dwarf_Die *cudie)
> {
> diff --git a/libdw/libdw_findcu.c b/libdw/libdw_findcu.c
> index 0e4dcc37..857ebd38 100644
> --- a/libdw/libdw_findcu.c
> +++ b/libdw/libdw_findcu.c
> @@ -177,7 +177,6 @@ __libdw_intern_next_unit (Dwarf *dbg, bool debug_types)
> newp->startp = data->d_buf + newp->start;
> newp->endp = data->d_buf + newp->end;
> eu_search_tree_init (&newp->locs_tree);
> - rwlock_init (newp->abbrev_lock);
> rwlock_init (newp->split_lock);
> mutex_init (newp->src_lock);
> mutex_init (newp->str_off_base_lock);