Hi Heather, On Tue, Oct 10, 2023 at 03:42:56PM +0200, Mark Wielaard wrote: > From: Heather McIntyre <h...@rice.edu> > > * (try_split_file): Use eu_tsearch. > Add lock for cu->split. > > Signed-off-by: Heather S. McIntyre <h...@rice.edu> > Signed-off-by: Mark Wielaard <m...@klomp.org> > --- > libdw/libdw_find_split_unit.c | 43 +++++++++++++++++++++++++++++------ > 1 file changed, 36 insertions(+), 7 deletions(-) > > diff --git a/libdw/libdw_find_split_unit.c b/libdw/libdw_find_split_unit.c > index 533f8072..a288e9bd 100644 > --- a/libdw/libdw_find_split_unit.c > +++ b/libdw/libdw_find_split_unit.c > @@ -34,13 +34,19 @@ > #include "libelfP.h" > > #include <limits.h> > -#include <search.h> > +#include <eu-search.h> > #include <stdlib.h> > #include <string.h> > #include <sys/types.h> > #include <sys/stat.h> > #include <fcntl.h> > > +/* __libdw_link_skel_split() modifies "skel->split = split" > + and "split->split = skel". > + "cu->split" is read at multiple locations and conditionally updated. > + Mutual exclusion is enforced to prevent a race. */ > +rwlock_define(static, cu_split_lock);
__libdw_link_skel_split is defined in libdw/libdwP.h and is called in try_split_file. (It is also called in src/readelf.c but that is a terrible hack, so ignore that for now.) A Dwarf_CU is created (see libdw/libdw_findcu.c) with split set to (Dwarf_CU *) -1 to indicate the split dwarf is not yet set. If cu->split is NULL it means the split dwarf was searched for, but not found. > static void > try_split_file (Dwarf_CU *cu, const char *dwo_path) > { > @@ -57,7 +63,7 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path) > if (split->unit_type == DW_UT_split_compile > && cu->unit_id8 == split->unit_id8) > { > - if (tsearch (split->dbg, &cu->dbg->split_tree, > + if (eu_tsearch (split->dbg, &cu->dbg->split_tree, > __libdw_finddbg_cb) == NULL) > { > /* Something went wrong. Don't link. */ > @@ -65,9 +71,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path) > break; > } > > + rwlock_wrlock(cu_split_lock); > + > /* Link skeleton and split compile units. */ > __libdw_link_skel_split (cu, split); > > + rwlock_unlock(cu_split_lock); > + Is this locking wide enough? It seems like multiple threads can race to this code and set different Dwarfs (which means they'll leak). See below. > /* We have everything we need from this ELF > file. And we are going to close the fd to > not run out of file descriptors. */ > @@ -75,8 +85,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path) > break; > } > } > + > + rwlock_rdlock(cu_split_lock); > + > if (cu->split == (Dwarf_CU *) -1) > dwarf_end (split_dwarf); > + > + rwlock_unlock(cu_split_lock); This means __libdw_link_skel_split wasn't called above (so the split_dwarf created by dwarf_begin didn't contain the expected CU). > } > /* Always close, because we don't want to run out of file > descriptors. See also the elf_fcntl ELF_C_FDDONE call > @@ -89,9 +104,13 @@ Dwarf_CU * > internal_function > __libdw_find_split_unit (Dwarf_CU *cu) > { > + rwlock_rdlock(cu_split_lock); > + Dwarf_CU *cu_split_local = cu->split; > + rwlock_unlock(cu_split_lock); > + > /* Only try once. */ > - if (cu->split != (Dwarf_CU *) -1) > - return cu->split; > + if (cu_split_local != (Dwarf_CU *) -1) > + return cu_split_local; > > /* We need a skeleton unit with a comp_dir and [GNU_]dwo_name attributes. > The split unit will be the first in the dwo file and should have the > @@ -116,7 +135,11 @@ __libdw_find_split_unit (Dwarf_CU *cu) > free (dwo_path); > } > > - if (cu->split == (Dwarf_CU *) -1) > + rwlock_rdlock(cu_split_lock); > + cu_split_local = cu->split; > + rwlock_unlock(cu_split_lock); > + > + if (cu_split_local == (Dwarf_CU *) -1) > { > /* Try compdir plus dwo_name. */ > Dwarf_Attribute compdir; It looks like multiple threads can end up here after having seen cu->split/cu_split_local == (Dwarf_CU *) -1) Which means multiple threads will enter try_split_file and might all call __libdw_link_skel_split as mentioned above. > @@ -138,9 +161,15 @@ __libdw_find_split_unit (Dwarf_CU *cu) > } > } > > + rwlock_wrlock(cu_split_lock); > + > /* If we found nothing, make sure we don't try again. */ > if (cu->split == (Dwarf_CU *) -1) > - cu->split = NULL; > + { > + cu->split = NULL; > + cu_split_local = cu->split; > + } > > - return cu->split; > + rwlock_unlock(cu_split_lock); > + return cu_split_local; > } I think it would be better to keep the lock during the whole __libdw_find_split_unit call. Cheers, Mark