Hi Heather,

On Tue, Oct 10, 2023 at 03:42:56PM +0200, Mark Wielaard wrote:
> From: Heather McIntyre <h...@rice.edu>
> 
>       * (try_split_file): Use eu_tsearch.
>       Add lock for cu->split.
> 
> Signed-off-by: Heather S. McIntyre <h...@rice.edu>
> Signed-off-by: Mark Wielaard <m...@klomp.org>
> ---
>  libdw/libdw_find_split_unit.c | 43 +++++++++++++++++++++++++++++------
>  1 file changed, 36 insertions(+), 7 deletions(-)
> 
> diff --git a/libdw/libdw_find_split_unit.c b/libdw/libdw_find_split_unit.c
> index 533f8072..a288e9bd 100644
> --- a/libdw/libdw_find_split_unit.c
> +++ b/libdw/libdw_find_split_unit.c
> @@ -34,13 +34,19 @@
>  #include "libelfP.h"
>  
>  #include <limits.h>
> -#include <search.h>
> +#include <eu-search.h>
>  #include <stdlib.h>
>  #include <string.h>
>  #include <sys/types.h>
>  #include <sys/stat.h>
>  #include <fcntl.h>
>  
> +/* __libdw_link_skel_split() modifies "skel->split = split"
> +   and "split->split = skel".
> +   "cu->split" is read at multiple locations and conditionally updated.
> +   Mutual exclusion is enforced to prevent a race. */
> +rwlock_define(static, cu_split_lock);

__libdw_link_skel_split is defined in libdw/libdwP.h and is called in
try_split_file. (It is also called in src/readelf.c but that is a
terrible hack, so ignore that for now.)

A Dwarf_CU is created (see libdw/libdw_findcu.c) with split set to
(Dwarf_CU *) -1 to indicate the split dwarf is not yet set. If
cu->split is NULL it means the split dwarf was searched for, but not
found.

>  static void
>  try_split_file (Dwarf_CU *cu, const char *dwo_path)
>  {
> @@ -57,7 +63,7 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
>             if (split->unit_type == DW_UT_split_compile
>                 && cu->unit_id8 == split->unit_id8)
>               {
> -               if (tsearch (split->dbg, &cu->dbg->split_tree,
> +               if (eu_tsearch (split->dbg, &cu->dbg->split_tree,
>                              __libdw_finddbg_cb) == NULL)
>                   {
>                     /* Something went wrong.  Don't link.  */
> @@ -65,9 +71,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
>                     break;
>                   }
>  
> +               rwlock_wrlock(cu_split_lock);
> +
>                 /* Link skeleton and split compile units.  */
>                 __libdw_link_skel_split (cu, split);
>  
> +               rwlock_unlock(cu_split_lock);
> +

Is this locking wide enough? It seems like multiple threads can race
to this code and set different Dwarfs (which means they'll leak). See
below.

>                 /* We have everything we need from this ELF
>                    file.  And we are going to close the fd to
>                    not run out of file descriptors.  */
> @@ -75,8 +85,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
>                 break;
>               }
>           }
> +
> +       rwlock_rdlock(cu_split_lock);
> +
>         if (cu->split == (Dwarf_CU *) -1)
>           dwarf_end (split_dwarf);
> +
> +       rwlock_unlock(cu_split_lock);

This means __libdw_link_skel_split wasn't called above (so the
split_dwarf created by dwarf_begin didn't contain the expected CU).

>       }
>        /* Always close, because we don't want to run out of file
>        descriptors.  See also the elf_fcntl ELF_C_FDDONE call
> @@ -89,9 +104,13 @@ Dwarf_CU *
>  internal_function
>  __libdw_find_split_unit (Dwarf_CU *cu)
>  {
> +  rwlock_rdlock(cu_split_lock);
> +  Dwarf_CU *cu_split_local = cu->split;
> +  rwlock_unlock(cu_split_lock);
> +
>    /* Only try once.  */
> -  if (cu->split != (Dwarf_CU *) -1)
> -    return cu->split;
> +  if (cu_split_local != (Dwarf_CU *) -1)
> +    return cu_split_local;
>  
>    /* We need a skeleton unit with a comp_dir and [GNU_]dwo_name attributes.
>       The split unit will be the first in the dwo file and should have the
> @@ -116,7 +135,11 @@ __libdw_find_split_unit (Dwarf_CU *cu)
>             free (dwo_path);
>           }
>  
> -       if (cu->split == (Dwarf_CU *) -1)
> +       rwlock_rdlock(cu_split_lock);
> +       cu_split_local = cu->split;
> +       rwlock_unlock(cu_split_lock);
> +
> +       if (cu_split_local == (Dwarf_CU *) -1)
>           {
>             /* Try compdir plus dwo_name.  */
>             Dwarf_Attribute compdir;

It looks like multiple threads can end up here after having seen
cu->split/cu_split_local == (Dwarf_CU *) -1) Which means multiple
threads will enter try_split_file and might all call
__libdw_link_skel_split as mentioned above.

> @@ -138,9 +161,15 @@ __libdw_find_split_unit (Dwarf_CU *cu)
>       }
>      }
>  
> +  rwlock_wrlock(cu_split_lock);
> +
>    /* If we found nothing, make sure we don't try again.  */
>    if (cu->split == (Dwarf_CU *) -1)
> -    cu->split = NULL;
> +    {
> +      cu->split = NULL;
> +      cu_split_local = cu->split;
> +    }
>  
> -  return cu->split;
> +  rwlock_unlock(cu_split_lock);
> +  return cu_split_local;
>  }

I think it would be better to keep the lock during the whole
__libdw_find_split_unit call.

Cheers,

Mark

Reply via email to