Hi Aaron,
On Tue, 2025-07-15 at 00:25 -0400, Aaron Merey wrote:
> Currently each archive descriptor maintains a single Elf_Arhdr for the
> current archive member (as determined by elf_next or elf_rand) which is
> returned by elf_getarhdr.
>
> A single per-archive Elf_Arhdr is not ideal since elf_next and elf_rand
> can invalidate an archive member's reference to its own Elf_Arhdr.
>
> Avoid this possible invalidation by storing each Elf_Arhdr in its
> archive member descriptor.
>
> The existing Elf_Arhdr parsing and storage in the archive descriptor
> internal state is left mostly untouched. When an archive member is
> created with elf_begin it is given its own copy of its Elf_Arhdr from
> the archive descriptor.
>
> Also update tests/arextract.c with verification that each Elf_Arhdr
> is distinct and remains valid after calls to elf_next that would
> have previously invalidated the Elf_Arhdr.
Nice. I like the design, but some questions below.
> diff --git a/libelf/elf_begin.c b/libelf/elf_begin.c
> index 3ed1f8d7..5ed5aaa3 100644
> --- a/libelf/elf_begin.c
> +++ b/libelf/elf_begin.c
> @@ -1065,11 +1065,14 @@ dup_elf (int fildes, Elf_Cmd cmd, Elf *ref)
>result = read_file (fildes, ref->state.ar.offset + sizeof (struct ar_hdr),
> ref->state.ar.elf_ar_hdr.ar_size, cmd, ref);
>
> - /* Enlist this new descriptor in the list of children. */
>if (result != NULL)
> {
> + /* Enlist this new descriptor in the list of children. */
>result->next = ref->state.ar.children;
>ref->state.ar.children = result;
> +
> + /* Ensure the member descriptor has its own copy of its header info.
> */
> + result->elf_ar_hdr = ref->state.ar.elf_ar_hdr;
> }
>
>return result;
Shouldn't this be result->elf_ar_hdr = ref->elf_ar_hdr.
See also below, the main question is if state.ar should still have an
elf_ar_hdr field itself.
> diff --git a/libelf/elf_clone.c b/libelf/elf_clone.c
> index e6fe4729..d6c8d541 100644
> --- a/libelf/elf_clone.c
> +++ b/libelf/elf_clone.c
> @@ -69,6 +69,7 @@ elf_clone (Elf *elf, Elf_Cmd cmd)
> == offsetof (struct Elf, state.elf64.scns));
>retval->state.elf.scns_last = &retval->state.elf32.scns;
>retval->state.elf32.scns.max = elf->state.elf32.scns.max;
> + retval->elf_ar_hdr = elf->elf_ar_hdr;
>
>retval->class = elf->class;
> }
Right, this is what I was expecting in elfdup above too.
> diff --git a/libelf/elf_getarhdr.c b/libelf/elf_getarhdr.c
> index 509f1da5..ec85fa71 100644
> --- a/libelf/elf_getarhdr.c
> +++ b/libelf/elf_getarhdr.c
> @@ -44,30 +44,12 @@ elf_getarhdr (Elf *elf)
>if (elf == NULL)
> return NULL;
>
> - Elf *parent = elf->parent;
> -
>/* Calling this function is not ok for any file type but archives. */
> - if (parent == NULL)
> + if (elf->parent == NULL)
> {
>__libelf_seterrno (ELF_E_INVALID_OP);
>return NULL;
> }
Should this be
if (elf->parent == NULL || elf->parent->kind != ELF_K_AR)
Below we do have an assert, so maybe this suggestion is wrong.
> - /* Make sure we have read the archive header. */
> - if (parent->state.ar.elf_ar_hdr.ar_name == NULL
> - && __libelf_next_arhdr_wrlock (parent) != 0)
> -{
> - rwlock_wrlock (parent->lock);
> - int st = __libelf_next_arhdr_wrlock (parent);
> - rwlock_unlock (parent->lock);
> -
> - if (st != 0)
> - /* Something went wrong. Maybe there is no member left. */
> - return NULL;
> -}
> -
> - /* We can be sure the parent is an archive. */
> - assert (parent->kind == ELF_K_AR);
Was this assert correct? Can an Elf only have an parent if it comes
from an ELF_K_AR file?
> - return &parent->state.ar.elf_ar_hdr;
> + return &elf->elf_ar_hdr;
> }
> diff --git a/libelf/libelfP.h b/libelf/libelfP.h
> index 66e7e4dd..20120ad3 100644
> --- a/libelf/libelfP.h
> +++ b/libelf/libelfP.h
> @@ -306,6 +306,9 @@ struct Elf
>/* Reference counting for the descriptor. */
>int ref_count;
>
> + /* Per-descriptor copy of the structure returned by 'elf_getarhdr'. */
> + Elf_Arhdr elf_ar_hdr;
> +
>/* Lock to handle multithreaded programs. */
>rwlock_define (,lock);
So this adds an Elf_Arhdr to all Elfs.
Three questions:
- Do you still need an elf_ar_hdr field in the state union for ar?
- Should this go into the the elf and/or elf32/64 state union
structs instead?
- What about the state.ar.ar_name and state.ar_rawname fields?
Those are pointed to from the elf_ar_hdr fields ar_name and raw_name.
So shouldn't they also be held as top-level or elf[32|64] field
states?
> diff --git a/tests/arextract.c b/tests/arextract.c
> index 936d7f55..7920d1c9 100644
> --- a/tests/arextract.c
> +++ b/tests/arextract.c
> @@ -21,12 +21,20 @@
>
> #include
> #include
> +#include
> #include
> #include
> #include
> #include
> #include
>
> +typedef struct hdr_node {
> +Elf *elf;
> +Elf_Arhdr *hdr;
> +str