Hi Aaron,

On Fri, 2025-10-17 at 19:55 -0400, Aaron Merey wrote:
> If elf_getarhdr is called on a descriptor that refers to an archive
> which is itself a member of another archive, it may return the Elf_Arhdr
> of the current member (i.e., the member selected by elf_next or elf_rand)
> of the inner archive instead of Elf_Arhdr of the inner archive itself.
> 
> This also causes a memory leak: elf_end only attempts to free
> Elf_Arhdr fields ar_name and ar_rawname for descriptors that are not
> ELF_K_AR.
> 
> To fix this, replace the state.elf[32|64] field elf_ar_hdr with new
> struct Elf field elf_ar_hdr.  This field stores the Elf_Arhdr for all
> descriptors of archive members, including those with kind ELF_K_AR.
> 
> Also rename the state.ar field elf_ar_hdr to cur_ar_hdr to clarify that
> this is the header of an archive's current member.

This setup also feels cleaner. Looks good. I only have some nitpicks
about making the testcase output a little clearer. But it looks like
the testcase just works as intended. So feel free to ignore those
nitpicks (they only really matter if the test might for some reason
fail, at which time you can always hack the output yourself to
clarify).

> Signed-off-by: Aaron Merey <[email protected]>
> ---
> 
> v2 changes: Use new field elf_ar_hdr to store any descriptor's archive
> header.  In v1, the header for an ELF_K_ELF file was stored in
> state.elf[32|64].elf_ar_hdr and the header for an ELF_K_AR file was
> stored in state.ar.ar_ar_hdr.
> 
> v2 also adds nested archive tests to run-ar.sh.
> 
>  libdwfl/open.c        |   6 +-
>  libelf/elf_begin.c    |  16 +++---
>  libelf/elf_end.c      |  11 ++--
>  libelf/elf_getarhdr.c |   2 +-
>  libelf/elf_next.c     |   4 +-
>  libelf/elf_rand.c     |   2 +-
>  libelf/libelfP.h      |   8 +--
>  tests/Makefile.am     |   2 +
>  tests/ar-extract-ar.c | 125 ++++++++++++++++++++++++++++++++++++++++++
>  tests/run-ar.sh       |  80 +++++++++++++++++++++++++++
>  10 files changed, 229 insertions(+), 27 deletions(-)
>  create mode 100644 tests/ar-extract-ar.c
> 
> diff --git a/libdwfl/open.c b/libdwfl/open.c
> index 03e66dfa..e66595fb 100644
> --- a/libdwfl/open.c
> +++ b/libdwfl/open.c
> @@ -148,12 +148,12 @@ libdw_open_elf (int *fdp, Elf **elfp, bool 
> close_on_fail, bool archive_ok,
>       {
>         /* Pure evil.  libelf needs some better interfaces.  */
>         elf->kind = ELF_K_AR;
> -       elf->state.ar.elf_ar_hdr.ar_name = "libdwfl is faking you out";
> -       elf->state.ar.elf_ar_hdr.ar_size = elf->maximum_size - offset;
> +       elf->state.ar.cur_ar_hdr.ar_name = "libdwfl is faking you out";
> +       elf->state.ar.cur_ar_hdr.ar_size = elf->maximum_size - offset;
>         elf->state.ar.offset = offset - sizeof (struct ar_hdr);
>         Elf *subelf = elf_begin (-1, elf->cmd, elf);
>         elf->kind = ELF_K_NONE;
> -       elf->state.ar.elf_ar_hdr.ar_name = NULL;
> +       elf->state.ar.cur_ar_hdr.ar_name = NULL;
>         if (unlikely (subelf == NULL))
>           error = DWFL_E_LIBELF;
>         else

Pure evil indeed, but OK.

> diff --git a/libelf/elf_begin.c b/libelf/elf_begin.c
> index a824893f..d22f107d 100644
> --- a/libelf/elf_begin.c
> +++ b/libelf/elf_begin.c
> @@ -62,7 +62,7 @@ file_read_ar (int fildes, void *map_address, off_t offset, 
> size_t maxsize,
>        happen on demand.  */
>        elf->state.ar.offset = offset + SARMAG;
>  
> -      elf->state.ar.elf_ar_hdr.ar_rawname = elf->state.ar.raw_name;
> +      elf->state.ar.cur_ar_hdr.ar_rawname = elf->state.ar.raw_name;
>      }
>  
>    return elf;

OK.

> @@ -821,10 +821,7 @@ copy_arhdr (Elf_Arhdr *dest, Elf *ref)
>  {
>    Elf_Arhdr *hdr;
>  
> -  if (ref->kind == ELF_K_AR)
> -    hdr = &ref->state.ar.elf_ar_hdr;
> -  else
> -    hdr = &ref->state.elf.elf_ar_hdr;
> +  hdr = &ref->state.ar.cur_ar_hdr;
>  
>    char *ar_name = hdr->ar_name;
>    char *ar_rawname = hdr->ar_rawname;

OK.

> @@ -909,7 +906,7 @@ __libelf_next_arhdr_wrlock (Elf *elf)
>    /* Copy the raw name over to a NUL terminated buffer.  */
>    *((char *) mempcpy (elf->state.ar.raw_name, ar_hdr->ar_name, 16)) = '\0';
>  
> -  elf_ar_hdr = &elf->state.ar.elf_ar_hdr;
> +  elf_ar_hdr = &elf->state.ar.cur_ar_hdr;
>  
>    /* Now convert the `struct ar_hdr' into `Elf_Arhdr'.
>       Determine whether this is a special entry.  */

OK.

> @@ -1102,7 +1099,7 @@ dup_elf (int fildes, Elf_Cmd cmd, Elf *ref)
>       member the internal pointer of the archive file descriptor is
>       pointing to.  First read the header of the next member if this
>       has not happened already.  */
> -  if (ref->state.ar.elf_ar_hdr.ar_name == NULL
> +  if (ref->state.ar.cur_ar_hdr.ar_name == NULL
>        && __libelf_next_arhdr_wrlock (ref) != 0)
>      /* Something went wrong.  Maybe there is no member left.  */
>      return NULL;

OK.

> @@ -1114,7 +1111,7 @@ dup_elf (int fildes, Elf_Cmd cmd, Elf *ref)
>    size_t max_size = ref->maximum_size;
>    size_t offset = (size_t) (ref->state.ar.offset - ref->start_offset);
>    size_t hdr_size = sizeof (struct ar_hdr);
> -  size_t ar_size = (size_t) ref->state.ar.elf_ar_hdr.ar_size;
> +  size_t ar_size = (size_t) ref->state.ar.cur_ar_hdr.ar_size;
>    if (max_size < hdr_size || max_size - hdr_size < offset)
>      return NULL;

OK.

> @@ -1130,8 +1127,9 @@ dup_elf (int fildes, Elf_Cmd cmd, Elf *ref)
>      {
>        /* Enlist this new descriptor in the list of children.  */
>        result->next = ref->state.ar.children;
> -      result->state.elf.elf_ar_hdr = ar_hdr;
>        ref->state.ar.children = result;
> +
> +      result->elf_ar_hdr = ar_hdr;
>      }
>    else
>      {

OK.

> diff --git a/libelf/elf_end.c b/libelf/elf_end.c
> index 1d366127..9df2e165 100644
> --- a/libelf/elf_end.c
> +++ b/libelf/elf_end.c
> @@ -116,14 +116,11 @@ elf_end (Elf *elf)
>        rwlock_unlock (parent->lock);
>      }
>  
> -  if (elf->kind != ELF_K_AR)
> -    {
> -      if (elf->state.elf.elf_ar_hdr.ar_name != NULL)
> -     free (elf->state.elf.elf_ar_hdr.ar_name);
> +  if (elf->elf_ar_hdr.ar_name != NULL)
> +    free (elf->elf_ar_hdr.ar_name);
>  
> -      if (elf->state.elf.elf_ar_hdr.ar_rawname != NULL)
> -     free (elf->state.elf.elf_ar_hdr.ar_rawname);
> -    }
> +  if (elf->elf_ar_hdr.ar_rawname != NULL)
> +    free (elf->elf_ar_hdr.ar_rawname);
>  
>    /* This was the last activation.  Free all resources.  */
>    switch (elf->kind)

OK. Nice and simple.

> diff --git a/libelf/elf_getarhdr.c b/libelf/elf_getarhdr.c
> index 9211fc2e..dcd5718e 100644
> --- a/libelf/elf_getarhdr.c
> +++ b/libelf/elf_getarhdr.c
> @@ -51,5 +51,5 @@ elf_getarhdr (Elf *elf)
>        return NULL;
>      }
>  
> -  return &elf->state.elf.elf_ar_hdr;
> +  return &elf->elf_ar_hdr;
>  }

OK.

> diff --git a/libelf/elf_next.c b/libelf/elf_next.c
> index 6edafd2e..32d91b51 100644
> --- a/libelf/elf_next.c
> +++ b/libelf/elf_next.c
> @@ -56,7 +56,7 @@ elf_next (Elf *elf)
>  
>    /* Now advance the offset.  */
>    parent->state.ar.offset += (sizeof (struct ar_hdr)
> -                           + ((parent->state.ar.elf_ar_hdr.ar_size + 1)
> +                           + ((parent->state.ar.cur_ar_hdr.ar_size + 1)
>                                & ~1l));
>  
>    /* Get the next archive header.  */

OK.

> @@ -64,7 +64,7 @@ elf_next (Elf *elf)
>  
>    /* If necessary, mark the archive header as unusable.  */
>    if (ret == ELF_C_NULL)
> -    parent->state.ar.elf_ar_hdr.ar_name = NULL;
> +    parent->state.ar.cur_ar_hdr.ar_name = NULL;
>  
>    rwlock_unlock (parent->lock);

OK.

> diff --git a/libelf/elf_rand.c b/libelf/elf_rand.c
> index f1850e7b..c08eadc6 100644
> --- a/libelf/elf_rand.c
> +++ b/libelf/elf_rand.c
> @@ -53,7 +53,7 @@ elf_rand (Elf *elf, size_t offset)
>    if (__libelf_next_arhdr_wrlock (elf) != 0)
>      {
>        /* Mark the archive header as unusable.  */
> -      elf->state.ar.elf_ar_hdr.ar_name = NULL;
> +      elf->elf_ar_hdr.ar_name = NULL;
>        return 0;
>      }

OK.

> diff --git a/libelf/libelfP.h b/libelf/libelfP.h
> index 1b93da88..11ef5989 100644
> --- a/libelf/libelfP.h
> +++ b/libelf/libelfP.h
> @@ -306,6 +306,9 @@ struct Elf
>    /* Reference counting for the descriptor.  */
>    int ref_count;
>  
> +  /* Structure returned by 'elf_getarhdr'.  */
> +  Elf_Arhdr elf_ar_hdr;
> +
>    /* Lock to handle multithreaded programs.  */
>    rwlock_define (,lock);

OK, replaces the following 3...

> @@ -323,7 +326,6 @@ struct Elf
>                                  read from the file.  */
>        search_tree rawchunk_tree;  /* Tree and lock for elf_getdata_rawchunk
>                                    results.  */
> -      Elf_Arhdr elf_ar_hdr;     /* Structure returned by 'elf_getarhdr'.  */
>        unsigned int scnincr;  /* Number of sections allocate the last
>                                  time.  */
>        int ehdr_flags;                /* Flags (dirty) for ELF header.  */
> @@ -344,7 +346,6 @@ struct Elf
>                                  read from the file.  */
>        search_tree rawchunk_tree;  /* Tree and lock for
>                                    elf_getdata_rawchunk results.  */
> -      Elf_Arhdr elf_ar_hdr;     /* Structure returned by 'elf_getarhdr'.  */
>        unsigned int scnincr;  /* Number of sections allocate the last
>                                  time.  */
>        int ehdr_flags;                /* Flags (dirty) for ELF header.  */
> @@ -371,7 +372,6 @@ struct Elf
>                                  read from the file.  */
>        search_tree rawchunk_tree;  /* Tree and lock for
>                                    elf_getdata_rawchunk results.  */
> -      Elf_Arhdr elf_ar_hdr;     /* Structure returned by 'elf_getarhdr'.  */
>        unsigned int scnincr;  /* Number of sections allocate the last
>                                  time.  */
>        int ehdr_flags;                /* Flags (dirty) for ELF header.  */

OK.

> @@ -397,7 +397,7 @@ struct Elf
>        int64_t offset;                /* Offset in file we are currently at.
>                                  elf_next() advances this to the next
>                                  member of the archive.  */
> -      Elf_Arhdr elf_ar_hdr;     /* Copy of current archive member's structure
> +      Elf_Arhdr cur_ar_hdr;     /* Copy of current archive member's structure
>                                  returned by 'elf_getarhdr'.  */
>        struct ar_hdr ar_hdr;  /* Header read from file.  */
>        char ar_name[16];              /* NUL terminated ar_name of 
> elf_ar_hdr.  */

OK, better name (distinct from the union members).

> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 00ba754d..15c32a85 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -68,6 +68,7 @@ check_PROGRAMS = arextract arsymtest newfile saridx 
> scnnames sectiondump \
>                 cu-dwp-section-info declfiles test-manyfuncs \
>                 eu_search_cfi eu_search_macros \
>                 eu_search_lines eu_search_die \
> +               ar-extract-ar \
>                 $(asm_TESTS)
>  
>  asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \
> @@ -774,6 +775,7 @@ libeu = ../lib/libeu.a
>  
>  arextract_LDADD = $(libelf)
>  arsymtest_LDADD = $(libelf)
> +ar_extract_ar_LDADD = $(libelf)
>  newfile_LDADD = $(libelf)
>  saridx_LDADD = $(libeu) $(libelf)
>  scnnames_LDADD = $(libelf)

OK.

> diff --git a/tests/ar-extract-ar.c b/tests/ar-extract-ar.c
> new file mode 100644
> index 00000000..8c32b036
> --- /dev/null
> +++ b/tests/ar-extract-ar.c
> @@ -0,0 +1,125 @@
> +/* Copyright (C) 2025 Red Hat, Inc.
> +   This file is part of elfutils.
> +
> +   This file is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   elfutils is distributed in the hope that it will be useful, but
> +   WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
> +
> +#ifdef HAVE_CONFIG_H
> +# include <config.h>
> +#endif
> +
> +#include <assert.h>
> +#include <fcntl.h>
> +#include <gelf.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <system.h>
> +
> +
> +static void
> +ar_extract (Elf *elf)
> +{
> +  Elf *subelf;
> +  Elf_Arsym *arsym;
> +  size_t narsym;
> +  Elf_Cmd cmd = ELF_C_READ;
> +
> +  assert (elf_kind (elf) == ELF_K_AR);
> +
> +  arsym = elf_getarsym (elf, &narsym);
> +  if (arsym == NULL)
> +    {
> +      printf ("Cannot get archive index: %s\n", elf_errmsg (-1));
> +      exit (1);
> +    }
> +
> +  if (narsym != 4)
> +    {
> +      printf ("Incorrect number of arsyms\n");
> +      exit (1);
> +    }

Hardcoded, but OK just for the testcase.

> +  /* Print symbol names.  Skip the null entry at the end of arsyms.  */
> +  for (size_t i = 0; i < narsym - 1; ++i)
> +    printf ("%s\n", arsym[i].as_name);

Maybe add a header "= symbol names =" and/or a blank line after?

> +  /* Print header names and recursively call this function on member
> +     archives.  */
> +  while ((subelf = elf_begin (-1, cmd, elf)) != NULL)
> +    {
> +      /* The the header for this element.  */
> +      Elf_Arhdr *arhdr = elf_getarhdr (subelf);
> +
> +      if (arhdr == NULL)
> +     {
> +       printf ("cannot get arhdr: %s\n", elf_errmsg (-1));
> +       exit (1);
> +     }
> +
> +      printf ("%s %s\n", arhdr->ar_name, arhdr->ar_rawname);
> +
> +      if (elf_kind (subelf) == ELF_K_AR)
> +     ar_extract (subelf);
> +
> +      cmd = elf_next (subelf);
> +      elf_end (subelf);
> +    }
> +}

OK. Could be a little clearer by somehow trying to show indention (make
ar_extract take an indentation number that gets increased when calling
itself?

> +
> +int
> +main (int argc, char *argv[])
> +{
> +  int fd;
> +  Elf *elf;
> +  Elf_Cmd cmd;
> +
> +  if (argc < 2)
> +    exit (1);
> +
> +  /* Open the archive.  */
> +  fd = open (argv[1], O_RDONLY);
> +  if (fd == -1)
> +    {
> +      printf ("Cannot open input file: %m");
> +      exit (1);
> +    }
> +
> +  /* Set the ELF version.  */
> +  elf_version (EV_CURRENT);
> +
> +  /* Create an ELF descriptor.  */
> +  cmd = ELF_C_READ;
> +  elf = elf_begin (fd, cmd, NULL);
> +
> +  if (elf == NULL)
> +    {
> +      printf ("Cannot create ELF descriptor: %s\n", elf_errmsg (-1));
> +      exit (1);
> +    }
> +
> +  /* If it is no archive punt.  */
> +  if (elf_kind (elf) != ELF_K_AR)
> +    {
> +      printf ("`%s' is no archive\n", argv[1]);
> +      exit (1);
> +    }
> +
> +  ar_extract (elf);
> +  elf_end (elf);
> +  close (fd);
> +
> +  return 0;
> +}

OK.

> diff --git a/tests/run-ar.sh b/tests/run-ar.sh
> index 656f1d1a..1dd94546 100755
> --- a/tests/run-ar.sh
> +++ b/tests/run-ar.sh
> @@ -37,4 +37,84 @@ echo Check new ar file is now empty
>  testrun_compare ${abs_top_builddir}/src/ar -t test.ar << EOF
>  EOF
>  
> +tempfiles bin* dup* long* inner* outer*

Slightly surprised this just works, but it does.

> +echo Compile files for nested archives.
> +
> +cat > dup.c <<'EOF'
> +int dup_func(void){return 1;}
> +EOF
> +cat > bin_outer.c <<'EOF'
> +int outer_func(void){return 1;}
> +EOF
> +cat > long_name_long_name_outer.c <<'EOF'
> +int long_name_long_name_func(void){return 1;}
> +EOF
> +cat > bin_inner1.c <<'EOF'
> +int inner1_func(void){return 1;}
> +EOF
> +cat > long_name_long_name_inner1.c <<'EOF'
> +int long_name_long_name_func(void){return 1;}
> +EOF
> +cat > bin_inner2.c <<'EOF'
> +int inner2_func(void){return 1;}
> +EOF
> +cat > long_name_long_name_inner2.c <<'EOF'
> +int long_name_long_name_func(void){return 1;}
> +EOF
> +
> +# Compile the source files.
> +for src in *.c; do
> +  obj=$(echo "$src" | sed 's/\.c$/.o/')
> +  gcc -O0 -c "$src" -o "$obj"
> +done

OK.

> +echo Create nested archives.
> +testrun ${abs_top_builddir}/src/ar -r inner2.ar bin_inner2.o
> +testrun ${abs_top_builddir}/src/ar -r inner2.ar dup.o
> +testrun ${abs_top_builddir}/src/ar -r inner2.ar long_name_long_name_inner2.o
> +
> +# inner1.ar contains inner2.ar
> +testrun ${abs_top_builddir}/src/ar -r inner1.ar inner2.ar
> +testrun ${abs_top_builddir}/src/ar -r inner1.ar bin_inner1.o
> +testrun ${abs_top_builddir}/src/ar -r inner1.ar dup.o
> +testrun ${abs_top_builddir}/src/ar -r inner1.ar long_name_long_name_inner1.o
> +
> +# outer.ar contains inner1.ar
> +testrun ${abs_top_builddir}/src/ar -r outer.ar inner1.ar
> +testrun ${abs_top_builddir}/src/ar -r outer.ar bin_outer.o
> +testrun ${abs_top_builddir}/src/ar -r outer.ar dup.o
> +testrun ${abs_top_builddir}/src/ar -r outer.ar long_name_long_name_outer.o
> +
> +echo Check symbol and header names of the nested archives.
> +
> +testrun ${abs_builddir}/ar-extract-ar outer.ar <<'EOF'
> +outer_func
> +dup_func
> +long_name_long_name_func
> +/ /
> +// //
> +inner1.ar inner1.ar/
> +inner1_func
> +dup_func
> +long_name_long_name_func
> +/ /
> +// //
> +inner2.ar inner2.ar/
> +inner2_func
> +dup_func
> +long_name_long_name_func
> +/ /
> +// //
> +bin_inner2.o bin_inner2.o/
> +dup.o dup.o/
> +long_name_long_name_inner2.o /0
> +bin_inner1.o bin_inner1.o/
> +dup.o dup.o/
> +long_name_long_name_inner1.o /0
> +bin_outer.o bin_outer.o/
> +dup.o dup.o/
> +long_name_long_name_outer.o /0
> +EOF
> +
>  exit 0

OK.

Reply via email to