Hi Serhei,
On Tue, Oct 14, 2025 at 4:30 PM Serhei Makarov <[email protected]> wrote:
>
> This patch adds a generic dwflst_sample_getframes() API that does not
> depend on perf_events concepts, in particular the
> linux-kernel-specific enum defining the perf_regs_mask register order.
> This involves reworking the register-handling backend to use
> regs_mapping arrays rather than perf_regs_mask, and provide a way to
> translate perf_regs_mask to regs_mapping.
>
> A regs_mapping array, for each item in a provided regs[] array,
> specifies its position in the full register file expected by the DWARF
> functionality.
>
> TODO: Fixup copy_word dependency on abi in dwflst_sample_frame.c?
>
> * libdwfl_stacktrace/Makefile.am: Rename dwflst_sample_frame.c from
> dwflst_perf_frame.c.
> * libdwfl_stacktrace/libdwfl_stacktrace.h (dwflst_sample_getframes):
> New function providing unwinding functionality with a regs_mapping
> array rather than a linux-kernel-dependent perf_regs_mask.
> * libdwfl_stacktrace/dwflst_sample_frame.c: Renamed from
> dwflst_perf_frame.c.
> (struct sample_info): Rename from perf_sample_info, include
> regs_mapping field.
> (sample_next_thread): Renamed struct sample_info.
> (sample_getthread): Renamed struct sample_info.
> (elf_memory_read): Renamed struct sample_info.
> (sample_memory_read): Renamed struct sample_info.
> (sample_set_initial_registers): Renamed struct sample_info,
> pass regs_mapping to ebl_set_initial_registers_sample.
> (dwflst_sample_getframes): New function.
> (dwflst_perf_sample_getframes): Reimplement in terms of
> dwflst_sample_getframes and ebl_sample_perf_regs_mapping.
> * libebl/ebl-hooks.h (set_initial_registers_sample): Now
> takes regs_mapping instead of regs_mask.
> (sample_base_addr): Removed.
> (sample_pc): Removed.
> (sample_sp_pc): New function combining the removed functions for
> efficiency.
> (sample_perf_regs_mapping): New function translating
> perf_regs_mask to regs_mapping array.
> * libebl/eblinitreg_sample.c (ebl_sample_base_addr): Removed.
> (ebl_sample_pc): Removed.
> (ebl_sample_sp_pc): New function.
> (ebl_set_initial_registers_sample): Take regs_mapping, provide
> a default implementation for contiguous dwarf_regs array.
> (ebl_sample_perf_regs_mapping): New function.
> * libebl/libebl.h (ebl_set_initial_registers_sample): Now takes
> regs_mapping instead of regs_mask.
> (ebl_sample_base_addr): Removed.
> (ebl_sample_pc): Removed.
> (ebl_sample_sp_pc): New function.
> (ebl_sample_perf_regs_mapping): New function.
> * libebl/libeblP.h (struct ebl): Add caching fields to remove the
> need to repeat a sample_perf_regs_mapping() computation for
> every frame when the perf_regs_mask is consistent.
> * backends/Makefile.am: Remove no-longer-needed linux-perf-regs.c.
> * backends/i386_init.c (i386_init): Renamed sample_* functions,
> added cached_regs_mapping and related fields/functions.
> * backends/i386_initreg_sample.c (i386_sample_base_addr): Removed.
> (i386_sample_pc): Removed.
> (i386_sample_sp_pc): New function combining the removed functions.
> (i386_set_initial_registers_sample): Removed.
> (i386_sample_perf_regs_mapping): New function translating
> perf_regs_mask to regs_mapping array.
> * backends/linux-perf-regs.c: Removed as perf_sample_find_reg is no
> longer needed.
> * backends/x86_64_init.c (x86_64_init): Renamed sample_* functions,
> added cached_regs_mapping and related fields/functions.
> * backends/x86_64_initreg_sample.c (x86_64_sample_base_addr): Removed.
> (x86_64_sample_pc): Removed.
> (x86_64_sample_sp_pc): New function combining the removed functions.
> (x86_64_set_initial_registers_sample): Removed.
> (x86_64_sample_perf_regs_mapping): New function translating
> perf_regs_mask to regs_mapping array.
> * backends/x86_initreg_sample.c (x86_set_initial_registers_sample):
> Removed.
> (x86_sample_sp_pc): New function.
> (x86_sample_perf_regs_mapping): New function translating
> perf_regs_mask to regs_mapping array.
> ---
> backends/Makefile.am | 2 +-
> backends/i386_init.c | 9 +-
> backends/i386_initreg_sample.c | 72 +++---------
> backends/linux-perf-regs.c | 48 --------
> backends/x86_64_init.c | 9 +-
> backends/x86_64_initreg_sample.c | 70 +++---------
> backends/x86_initreg_sample.c | 108 ++++++++++++++----
> libdwfl_stacktrace/Makefile.am | 2 +-
> ...lst_perf_frame.c => dwflst_sample_frame.c} | 98 ++++++++++------
> libdwfl_stacktrace/libdwfl_stacktrace.h | 31 +++--
> libebl/ebl-hooks.h | 35 ++++--
> libebl/eblinitreg_sample.c | 63 +++++++---
> libebl/libebl.h | 50 +++++---
> libebl/libeblP.h | 7 ++
> 14 files changed, 321 insertions(+), 283 deletions(-)
> delete mode 100644 backends/linux-perf-regs.c
> rename libdwfl_stacktrace/{dwflst_perf_frame.c => dwflst_sample_frame.c}
> (75%)
>
> diff --git a/backends/Makefile.am b/backends/Makefile.am
> index 8ccbdb50..7a820df0 100644
> --- a/backends/Makefile.am
> +++ b/backends/Makefile.am
> @@ -121,7 +121,7 @@ am_libebl_backends_pic_a_OBJECTS =
> $(libebl_backends_a_SOURCES:.c=.os)
>
> noinst_HEADERS = libebl_CPU.h libebl_PERF_FLAGS.h common-reloc.c \
> linux-core-note.c x86_corenote.c \
> - linux-perf-regs.c x86_initreg_sample.c
> + x86_initreg_sample.c
>
> EXTRA_DIST = $(modules:=_reloc.def)
>
> diff --git a/backends/i386_init.c b/backends/i386_init.c
> index e64ef6ed..a980e71a 100644
> --- a/backends/i386_init.c
> +++ b/backends/i386_init.c
> @@ -60,10 +60,13 @@ i386_init (Elf *elf __attribute__ ((unused)),
> (Likely an artifact of reusing that header between i386/x86_64.) */
> eh->frame_nregs = 9;
> HOOK (eh, set_initial_registers_tid);
> - HOOK (eh, set_initial_registers_sample);
> - HOOK (eh, sample_base_addr);
> - HOOK (eh, sample_pc);
> + /* set_initial_registers_sample is default ver */
> + HOOK (eh, sample_sp_pc);
> + HOOK (eh, sample_perf_regs_mapping);
> eh->perf_frame_regs_mask = PERF_FRAME_REGISTERS_I386;
> + eh->cached_perf_regs_mask = 0;
> + eh->cached_regs_mapping = NULL;
> + eh->cached_n_regs_mapping = -1;
> HOOK (eh, unwind);
>
> return eh;
> diff --git a/backends/i386_initreg_sample.c b/backends/i386_initreg_sample.c
> index 677393c9..94955191 100644
> --- a/backends/i386_initreg_sample.c
> +++ b/backends/i386_initreg_sample.c
> @@ -31,6 +31,7 @@
> #endif
>
> #include <stdlib.h>
> +#include <assert.h>
> #if (defined __i386__ || defined __x86_64__) && defined(__linux__)
> # include <linux/perf_event.h>
> # include <asm/perf_regs.h>
> @@ -40,69 +41,26 @@
> #include "libebl_CPU.h"
> #include "libebl_PERF_FLAGS.h"
> #if (defined __i386__ || defined __x86_64__) && defined(__linux__)
> -# include "linux-perf-regs.c"
> # include "x86_initreg_sample.c"
> #endif
>
> -/* Register ordering cf. linux arch/x86/include/uapi/asm/perf_regs.h,
> - enum perf_event_x86_regs: */
> -Dwarf_Word
> -i386_sample_base_addr (const Dwarf_Word *regs, uint32_t n_regs,
> - uint64_t regs_mask,
> - /* XXX hypothetically needed if abi varies
> - between samples in the same process;
> - not needed on x86 */
> - uint32_t abi __attribute__((unused)))
> -{
> -#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__)
> - (void)regs;
> - (void)n_regs;
> - (void)regs_mask;
> - return 0;
> -#else /* __i386__ || __x86_64__ */
> - (void)regs;
> - (void)n_regs;
> - (void)regs_mask;
> - return perf_sample_find_reg (regs, n_regs, regs_mask,
> - 7 /* index into perf_event_x86_regs */);
> -#endif
> -}
> -
> -Dwarf_Word
> -i386_sample_pc (const Dwarf_Word *regs, uint32_t n_regs,
> - uint64_t regs_mask,
> - uint32_t abi __attribute__((unused)))
> +bool
> +i386_sample_sp_pc (const Dwarf_Word *regs, uint32_t n_regs,
> + const int *regs_mapping, uint32_t n_regs_mapping,
> + Dwarf_Word *sp, Dwarf_Word *pc)
> {
> -#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__)
> - (void)regs;
> - (void)n_regs;
> - (void)regs_mask;
> - return 0;
> -#else /* __i386__ || __x86_64__ */
> - return perf_sample_find_reg (regs, n_regs, regs_mask,
> - 8 /* index into perf_event_x86_regs */);
> -#endif
> + /* XXX for dwarf_regs indices, compare i386_initreg.c */
> + return x86_sample_sp_pc (regs, n_regs, regs_mapping, n_regs_mapping,
> + sp, 4 /* index of sp in dwarf_regs */,
> + pc, 8 /* index of pc in dwarf_regs */);
> }
>
> bool
> -i386_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs,
> - uint64_t regs_mask, uint32_t abi,
> - ebl_tid_registers_t *setfunc,
> - void *arg)
> +i386_sample_perf_regs_mapping (Ebl *ebl,
> + uint64_t perf_regs_mask, uint32_t abi,
> + const int **regs_mapping,
> + size_t *n_regs_mapping)
> {
> -#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__)
> - (void)regs;
> - (void)n_regs;
> - (void)regs_mask;
> - (void)abi;
> - (void)setfunc;
> - (void)arg;
> - return false;
> -#else /* __i386__ || __x86_64__ */
> - Dwarf_Word dwarf_regs[9];
> - if (!x86_set_initial_registers_sample (regs, n_regs, regs_mask,
> - abi, dwarf_regs, 9))
> - return false;
> - return setfunc (0, 9, dwarf_regs, arg);
> -#endif
> + return x86_sample_perf_regs_mapping (ebl, perf_regs_mask, abi,
> + regs_mapping, n_regs_mapping);
> }
> diff --git a/backends/linux-perf-regs.c b/backends/linux-perf-regs.c
> deleted file mode 100644
> index 22ad67c6..00000000
> --- a/backends/linux-perf-regs.c
> +++ /dev/null
> @@ -1,48 +0,0 @@
> -/* Common pieces for handling registers in a linux perf_events sample.
> - Copyright (C) 2025 Red Hat, Inc.
> - This file is part of elfutils.
> -
> - This file is free software; you can redistribute it and/or modify
> - it under the terms of either
> -
> - * the GNU Lesser General Public License as published by the Free
> - Software Foundation; either version 3 of the License, or (at
> - your option) any later version
> -
> - or
> -
> - * the GNU General Public License as published by the Free
> - Software Foundation; either version 2 of the License, or (at
> - your option) any later version
> -
> - or both in parallel, as here.
> -
> - elfutils is distributed in the hope that it will be useful, but
> - WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - General Public License for more details.
> -
> - You should have received copies of the GNU General Public License and
> - the GNU Lesser General Public License along with this program. If
> - not, see <http://www.gnu.org/licenses/>. */
> -
> -static Dwarf_Word
> -perf_sample_find_reg (const Dwarf_Word *regs, uint32_t n_regs,
> - uint64_t regs_mask,
> - int target)
> -{
> - int j, k; uint64_t bit;
> - for (j = 0, k = 0, bit = 1; k < PERF_REG_X86_64_MAX; k++, bit <<= 1)
> - {
> - if (bit & regs_mask) {
> - if (n_regs <= (uint32_t) j)
> - return 0; /* regs_mask count doesn't match n_regs */
> - if (k == target)
> - return regs[j];
> - if (k > target)
> - return 0; /* regs_mask doesn't include desired reg */
> - j++;
> - }
> - }
> - return 0;
> -}
> diff --git a/backends/x86_64_init.c b/backends/x86_64_init.c
> index 6a1cbc4b..5f929758 100644
> --- a/backends/x86_64_init.c
> +++ b/backends/x86_64_init.c
> @@ -63,10 +63,13 @@ x86_64_init (Elf *elf __attribute__ ((unused)),
> /* gcc/config/ #define DWARF_FRAME_REGISTERS. */
> eh->frame_nregs = 17;
> HOOK (eh, set_initial_registers_tid);
> - HOOK (eh, set_initial_registers_sample);
> - HOOK (eh, sample_base_addr);
> - HOOK (eh, sample_pc);
> + /* set_initial_registers_sample is default ver */
> + HOOK (eh, sample_sp_pc);
> + HOOK (eh, sample_perf_regs_mapping);
> eh->perf_frame_regs_mask = PERF_FRAME_REGISTERS_X86_64;
> + eh->cached_perf_regs_mask = 0;
> + eh->cached_regs_mapping = NULL;
> + eh->cached_n_regs_mapping = -1;
> HOOK (eh, unwind);
> HOOK (eh, check_reloc_target_type);
>
> diff --git a/backends/x86_64_initreg_sample.c
> b/backends/x86_64_initreg_sample.c
> index 48d14bc8..9dd708c9 100644
> --- a/backends/x86_64_initreg_sample.c
> +++ b/backends/x86_64_initreg_sample.c
> @@ -31,6 +31,7 @@
> #endif
>
> #include <stdlib.h>
> +#include <assert.h>
> #if defined(__x86_64__) && defined(__linux__)
> # include <linux/perf_event.h>
> # include <asm/perf_regs.h>
> @@ -40,67 +41,26 @@
> #include "libebl_CPU.h"
> #include "libebl_PERF_FLAGS.h"
> #if defined(__x86_64__) && defined(__linux__)
> -# include "linux-perf-regs.c"
> # include "x86_initreg_sample.c"
> #endif
>
> -/* Register ordering cf. linux arch/x86/include/uapi/asm/perf_regs.h,
> - enum perf_event_x86_regs: */
> -Dwarf_Word
> -x86_64_sample_base_addr (const Dwarf_Word *regs, uint32_t n_regs,
> - uint64_t regs_mask,
> - /* XXX hypothetically needed if abi varies
> - between samples in the same process;
> - not needed on x86*/
> - uint32_t abi __attribute__((unused)))
> -{
> -#if !defined(__x86_64__) || !defined(__linux__)
> - (void)regs;
> - (void)n_regs;
> - (void)regs_mask;
> - return 0;
> -#else /* __x86_64__ */
> - return perf_sample_find_reg (regs, n_regs, regs_mask,
> - 7 /* index into perf_event_x86_regs */);
> -#endif
> -}
> -
> -Dwarf_Word
> -x86_64_sample_pc (const Dwarf_Word *regs, uint32_t n_regs,
> - uint64_t regs_mask,
> - uint32_t abi __attribute__((unused)))
> +bool
> +x86_64_sample_sp_pc (const Dwarf_Word *regs, uint32_t n_regs,
> + const int *regs_mapping, uint32_t n_regs_mapping,
> + Dwarf_Word *sp, Dwarf_Word *pc)
> {
> -#if !defined(__x86_64__) || !defined(__linux__)
> - (void)regs;
> - (void)n_regs;
> - (void)regs_mask;
> - return 0;
> -#else /* __x86_64__ */
> - return perf_sample_find_reg (regs, n_regs, regs_mask,
> - 8 /* index into perf_event_x86_regs */);
> -#endif
> + /* XXX for dwarf_regs indices, compare x86_64_initreg.c */
> + return x86_sample_sp_pc (regs, n_regs, regs_mapping, n_regs_mapping,
> + sp, 7 /* index of sp in dwarf_regs */,
> + pc, 16 /* index of pc in dwarf_regs */);
> }
>
> bool
> -x86_64_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs,
> - uint64_t regs_mask, uint32_t abi,
> - ebl_tid_registers_t *setfunc,
> - void *arg)
> +x86_64_sample_perf_regs_mapping (Ebl *ebl,
> + uint64_t perf_regs_mask, uint32_t abi,
> + const int **regs_mapping,
> + size_t *n_regs_mapping)
> {
> -#if !defined(__x86_64__) || !defined(__linux__)
> - (void)regs;
> - (void)n_regs;
> - (void)regs_mask;
> - (void)abi;
> - (void)setfunc;
> - (void)arg;
> - return false;
> -#else /* __x86_64__ */
> - Dwarf_Word dwarf_regs[17];
> - if (!x86_set_initial_registers_sample (regs, n_regs, regs_mask,
> - abi, dwarf_regs, 9))
> - return false;
> - return setfunc (0, 17, dwarf_regs, arg);
> -#endif
> + return x86_sample_perf_regs_mapping (ebl, perf_regs_mask, abi,
> + regs_mapping, n_regs_mapping);
> }
> -
> diff --git a/backends/x86_initreg_sample.c b/backends/x86_initreg_sample.c
> index 8d6b471b..f9b45462 100644
> --- a/backends/x86_initreg_sample.c
> +++ b/backends/x86_initreg_sample.c
> @@ -1,4 +1,4 @@
> -/* x86 linux perf_events register handling, pieces common to x86-64 and i386.
> +/* x86 stack sample register handling, pieces common to x86-64 and i386.
> Copyright (C) 2025 Red Hat, Inc.
> This file is part of elfutils.
>
> @@ -27,13 +27,52 @@
> not, see <http://www.gnu.org/licenses/>. */
>
> static bool
> -x86_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs,
> - uint64_t regs_mask, uint32_t abi,
> - Dwarf_Word *dwarf_regs, int expected_regs)
> +x86_sample_sp_pc (const Dwarf_Word *regs, uint32_t n_regs,
> + const int *regs_mapping, uint32_t n_regs_mapping,
> + Dwarf_Word *sp, uint sp_index /* into dwarf_regs */,
> + Dwarf_Word *pc, uint pc_index /* into dwarf_regs */)
> {
> -#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__)
> + if (sp != NULL) *sp = 0;
> + if (pc != NULL) *pc = 0;
> +#if !defined(__x86_64__)
> + (void)regs;
> + (void)n_regs;
> + (void)regs_mapping;
> + (void)n_regs_mapping;
> return false;
> -#else /* __i386__ || __x86_64__ */
> +#else /* __x86_64__ */
> + /* TODO: Register locations could be cached and rechecked on a
> + fastpath without needing to loop? */
> + int j, need_sp = (sp != NULL), need_pc = (pc != NULL);
> + for (j = 0; (need_sp || need_pc) && n_regs_mapping > (uint32_t)j; j++)
> + {
> + if (n_regs < (uint32_t)j) break;
> + if (need_sp && regs_mapping[j] == (int)sp_index)
> + {
> + *sp = regs[j]; need_sp = false;
> + }
> + if (need_pc && regs_mapping[j] == (int)pc_index)
> + {
> + *pc = regs[j]; need_pc = false;
> + }
> + }
> + return (!need_sp && !need_pc);
> +#endif
> +}
> +
> +static bool
> +x86_sample_perf_regs_mapping (Ebl *ebl,
> + uint64_t perf_regs_mask, uint32_t abi,
> + const int **regs_mapping,
> + size_t *n_regs_mapping)
> +{
> + if (perf_regs_mask != 0 && ebl->cached_perf_regs_mask == perf_regs_mask)
> + {
> + *regs_mapping = ebl->cached_regs_mapping;
> + *n_regs_mapping = ebl->cached_n_regs_mapping;
> + return true;
> + }
> +
> /* The following facts are needed to translate x86 registers correctly:
> - perf register order seen in linux
> arch/x86/include/uapi/asm/perf_regs.h
> The registers array is built in the same order as the enum!
> @@ -52,39 +91,58 @@ x86_set_initial_registers_sample (const Dwarf_Word *regs,
> uint32_t n_regs,
> bool is_abi32 = (abi == PERF_SAMPLE_REGS_ABI_32);
>
> /* Locations of dwarf_regs in the perf_event_x86_regs enum order,
> - not the regs[i] array (which will include a subset of the regs): */
> + not the regs[] array (which will include a subset of the regs): */
> static const int regs_i386[] = {0, 2, 3, 1, 7/*sp*/, 6, 4, 5, 8/*ip*/};
> static const int regs_x86_64[] = {0, 3, 2, 1, 4, 5, 6, 7/*sp*/,
> 16/*r8 after flags+segment*/, 17, 18, 19,
> 20, 21, 22, 23,
> 8/*ip*/};
> const int *dwarf_to_perf = is_abi32 ? regs_i386 : regs_x86_64;
>
> - /* Locations of perf_regs in the regs[] array, according to regs_mask: */
> - int perf_to_regs[PERF_REG_X86_64_MAX];
> - uint64_t expected_mask = is_abi32 ? PERF_FRAME_REGISTERS_I386 :
> PERF_FRAME_REGISTERS_X86_64;
> - int j, k; uint64_t bit;
> - /* TODO: Is it worth caching this perf_to_regs computation as long
> - as regs_mask is kept the same across repeated calls? */
> - for (j = 0, k = 0, bit = 1; k < PERF_REG_X86_64_MAX; k++, bit <<= 1)
> + /* Count bits and allocate regs_mapping: */
> + int j, k, kmax, count; uint64_t bit;
> + for (k = 0, kmax = -1, count = 0, bit = 1;
> + k < PERF_REG_X86_64_MAX; k++, bit <<= 1)
> {
> - if ((bit & expected_mask) && (bit & regs_mask)) {
> - if (n_regs <= (uint32_t)j)
> - return false; /* regs_mask count doesn't match n_regs */
> - perf_to_regs[k] = j;
> - j++;
> - } else {
> - perf_to_regs[k] = -1;
> + if ((bit & perf_regs_mask)) {
> + count++;
> + kmax = k;
> }
> }
> + /* TODO: Is locking necessary? */
Currently there is no locking in libebl itself and synchronization is
handled by the calling library.
> + ebl->cached_perf_regs_mask = perf_regs_mask;
> + ebl->cached_regs_mapping = (int *)calloc (count, sizeof(int));
I can't find a corresponding free for this calloc.
> + ebl->cached_n_regs_mapping = count;
>
> - for (int i = 0; i < expected_regs; i++)
> + /* Locations of perf_regs in the regs[] array, according to
> + perf_regs_mask: */
> + int perf_to_regs[PERF_REG_X86_64_MAX];
> + uint64_t expected_mask = is_abi32 ?
> + PERF_FRAME_REGISTERS_I386 : PERF_FRAME_REGISTERS_X86_64;
> + for (j = 0, k = 0, bit = 1; k <= kmax; k++, bit <<= 1)
> + {
> + if ((bit & expected_mask) && (bit && perf_regs_mask))
Is 'bit && perf_regs_masks' correct or should this use & instead?
> + {
> + perf_to_regs[k] = j;
> + j++;
> + }
> + else
> + {
> + perf_to_regs[k] = -1;
> + }
> + }
> + assert (j <= (int)ebl->cached_n_regs_mapping);
Returning false and setting dwfl_errno in the caller is better than
terminating the process. We do have asserts present in other elfutils
library code but we have been (slowly) replacing these.
> +
> + /* Locations of perf_regs in the dwarf_regs array, according to
> + perf_regs_mask and perf_to_regs[]: */
> + for (size_t i = 0; i < ebl->frame_nregs; i++)
> {
> k = dwarf_to_perf[i];
> j = perf_to_regs[k];
> if (j < 0) continue;
> - if (n_regs <= (uint32_t)j) continue;
> - dwarf_regs[i] = regs[j];
> + ebl->cached_regs_mapping[j] = i;
> }
> +
> + *regs_mapping = ebl->cached_regs_mapping;
> + *n_regs_mapping = ebl->cached_n_regs_mapping;
> return true;
> -#endif /* __i386__ || __x86_64__ */
> }
> diff --git a/libdwfl_stacktrace/Makefile.am b/libdwfl_stacktrace/Makefile.am
> index 99a80b5c..b9242129 100644
> --- a/libdwfl_stacktrace/Makefile.am
> +++ b/libdwfl_stacktrace/Makefile.am
> @@ -45,7 +45,7 @@ libdwfl_stacktrace_a_SOURCES = dwflst_process_tracker.c \
> dwflst_tracker_elftab.c \
> dwflst_tracker_dwfltab.c \
> libdwfl_stacktrace_next_prime.c \
> - dwflst_perf_frame.c
> + dwflst_sample_frame.c
>
> libdwfl_stacktrace = $(libdw)
> libdw = ../libdw/libdw.so
> diff --git a/libdwfl_stacktrace/dwflst_perf_frame.c
> b/libdwfl_stacktrace/dwflst_sample_frame.c
> similarity index 75%
> rename from libdwfl_stacktrace/dwflst_perf_frame.c
> rename to libdwfl_stacktrace/dwflst_sample_frame.c
> index 4fc60183..b5339a59 100644
> --- a/libdwfl_stacktrace/dwflst_perf_frame.c
> +++ b/libdwfl_stacktrace/dwflst_sample_frame.c
> @@ -67,7 +67,7 @@ uint64_t dwflst_perf_sample_preferred_regs_mask (GElf_Half
> machine)
> return 0;
> }
>
> -struct perf_sample_info {
> +struct sample_info {
> pid_t pid;
> pid_t tid;
> Dwarf_Addr base_addr;
> @@ -75,8 +75,9 @@ struct perf_sample_info {
> size_t stack_size;
> const Dwarf_Word *regs;
> uint n_regs;
> - uint64_t perf_regs_mask;
> - uint abi;
> + const int *regs_mapping;
> + size_t n_regs_mapping;
> + uint32_t abi;
> Dwarf_Addr pc;
> };
>
> @@ -88,8 +89,8 @@ static pid_t
> sample_next_thread (Dwfl *dwfl __attribute__ ((unused)), void *dwfl_arg,
> void **thread_argp)
> {
> - struct perf_sample_info *sample_arg =
> - (struct perf_sample_info *)dwfl_arg;
> + struct sample_info *sample_arg =
> + (struct sample_info *)dwfl_arg;
> if (*thread_argp == NULL)
> {
> *thread_argp = (void *)0xea7b3375;
> @@ -104,8 +105,8 @@ static bool
> sample_getthread (Dwfl *dwfl __attribute__ ((unused)), pid_t tid,
> void *dwfl_arg, void **thread_argp)
> {
> - struct perf_sample_info *sample_arg =
> - (struct perf_sample_info *)dwfl_arg;
> + struct sample_info *sample_arg =
> + (struct sample_info *)dwfl_arg;
> *thread_argp = (void *)sample_arg;
> if (sample_arg->tid != tid)
> {
> @@ -138,8 +139,8 @@ sample_getthread (Dwfl *dwfl __attribute__ ((unused)),
> pid_t tid,
> static bool
> elf_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void *arg)
> {
> - struct perf_sample_info *sample_arg =
> - (struct perf_sample_info *)arg;
> + struct sample_info *sample_arg =
> + (struct sample_info *)arg;
> Dwfl_Module *mod = INTUSE(dwfl_addrmodule) (dwfl, addr);
> Dwarf_Addr bias;
> Elf_Scn *section = INTUSE(dwfl_module_address_section) (mod, &addr, &bias);
> @@ -153,7 +154,7 @@ elf_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word
> *result, void *arg)
> Elf_Data *data = elf_getdata(section, NULL);
> if (data && data->d_buf && data->d_size > addr) {
> uint8_t *d = ((uint8_t *)data->d_buf) + addr;
> - copy_word(result, d, sample_arg->abi);
> + copy_word(result, d, sample_arg->abi); /* TODO */
> return true;
> }
> __libdwfl_seterrno(DWFL_E_ADDR_OUTOFRANGE);
> @@ -163,36 +164,37 @@ elf_memory_read (Dwfl *dwfl, Dwarf_Addr addr,
> Dwarf_Word *result, void *arg)
> static bool
> sample_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void
> *arg)
> {
> - struct perf_sample_info *sample_arg =
> - (struct perf_sample_info *)arg;
> + struct sample_info *sample_arg =
> + (struct sample_info *)arg;
> /* Imitate read_cached_memory() with the stack sample data as the cache. */
> if (addr < sample_arg->base_addr ||
> addr - sample_arg->base_addr >= sample_arg->stack_size)
> return elf_memory_read(dwfl, addr, result, arg);
> const uint8_t *d = &sample_arg->stack[addr - sample_arg->base_addr];
> - copy_word(result, d, sample_arg->abi);
> + copy_word(result, d, sample_arg->abi); /* TODO */
> return true;
> }
>
> +
> static bool
> sample_set_initial_registers (Dwfl_Thread *thread, void *arg)
> {
> - struct perf_sample_info *sample_arg =
> - (struct perf_sample_info *)arg;
> + struct sample_info *sample_arg =
> + (struct sample_info *)arg;
> INTUSE(dwfl_thread_state_register_pc) (thread, sample_arg->pc);
> Dwfl_Process *process = thread->process;
> Ebl *ebl = process->ebl;
> return ebl_set_initial_registers_sample
> (ebl, sample_arg->regs, sample_arg->n_regs,
> - sample_arg->perf_regs_mask, sample_arg->abi,
> + sample_arg->regs_mapping, sample_arg->n_regs_mapping,
> __libdwfl_set_initial_registers_thread, thread);
> }
>
> static void
> sample_detach (Dwfl *dwfl __attribute__ ((unused)), void *dwfl_arg)
> {
> - struct perf_sample_info *sample_arg =
> - (struct perf_sample_info *)dwfl_arg;
> + struct sample_info *sample_arg =
> + (struct sample_info *)dwfl_arg;
> free (sample_arg);
> }
>
> @@ -207,18 +209,18 @@ static const Dwfl_Thread_Callbacks
> sample_thread_callbacks =
> };
>
> int
> -dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf,
> - pid_t pid, pid_t tid,
> - const void *stack, size_t stack_size,
> - const Dwarf_Word *regs, uint n_regs,
> - uint64_t perf_regs_mask, uint abi,
> - int (*callback) (Dwfl_Frame *state, void *arg),
> - void *arg)
> +dwflst_sample_getframes (Dwfl *dwfl, Elf *elf,
> + pid_t pid, pid_t tid,
> + const void *stack, size_t stack_size,
> + const Dwarf_Word *regs, uint n_regs,
> + const int *regs_mapping, size_t n_regs_mapping,
> + int (*callback) (Dwfl_Frame *state, void *arg),
> + void *arg)
This is a nice improvement and helps keep the API generic. Do we
still need dwflst_perf_sample_getframes in the public API? Can the
dwflst_sample_getframes implementation figure out sample types or
regs_mappings by itself?
If not can we replace dwfl_perf_sample_getframes with a small helper
function? Something like dwflst_get_perf_mappings that returns
regs_mappings suitable for perf samples. If so, this work doesn't
need to be done in this patch. This API is still "experimental" so we
have flexibility to change this after the upcoming release.
> {
> /* TODO: Lock the dwfl to ensure attach_state does not interfere
> with other dwfl_perf_sample_getframes calls. */
>
> - struct perf_sample_info *sample_arg;
> + struct sample_info *sample_arg;
> bool attached = false;
> if (dwfl->process != NULL)
> {
> @@ -241,21 +243,49 @@ dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf,
> sample_arg->stack_size = stack_size;
> sample_arg->regs = regs;
> sample_arg->n_regs = n_regs;
> - sample_arg->perf_regs_mask = perf_regs_mask;
> - sample_arg->abi = abi;
> + sample_arg->regs_mapping = regs_mapping;
> + sample_arg->n_regs_mapping = n_regs_mapping;
> + /* TODO: Also populate sample_arg->abi. */
>
> if (! attached
> && ! INTUSE(dwfl_attach_state) (dwfl, elf, pid,
> &sample_thread_callbacks, sample_arg))
> - return -1;
> + return -1;
>
> - /* Now that Dwfl is attached, we can access its Ebl: */
> Dwfl_Process *process = dwfl->process;
> Ebl *ebl = process->ebl;
> - sample_arg->base_addr = ebl_sample_base_addr(ebl, regs, n_regs,
> - perf_regs_mask, abi);
> - sample_arg->pc = ebl_sample_pc(ebl, regs, n_regs,
> - perf_regs_mask, abi);
> + ebl_sample_sp_pc(ebl, regs, n_regs,
> + regs_mapping, n_regs_mapping,
> + &sample_arg->base_addr, &sample_arg->pc); /* TODO SUPPORT
> */
>
> return INTUSE(dwfl_getthread_frames) (dwfl, tid, callback, arg);
> }
> +
> +int
> +dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf,
> + pid_t pid, pid_t tid,
> + const void *stack, size_t stack_size,
> + const Dwarf_Word *regs, uint32_t n_regs,
> + uint64_t perf_regs_mask, uint32_t abi,
> + int (*callback) (Dwfl_Frame *state, void *arg),
> + void *arg)
> +{
> + /* Select the regs_mapping based on architecture. This will be
> + cached in ebl to avoid having to recompute the regs_mapping array
> + when perf_regs_mask is consistent for the entire session: */
> + const int *regs_mapping;
> + size_t n_regs_mapping;
> + Dwfl_Process *process = dwfl->process;
> + Ebl *ebl = process->ebl;
> + if (!ebl_sample_perf_regs_mapping(ebl,
> + perf_regs_mask, abi,
> + ®s_mapping, &n_regs_mapping))
> + return -1;
> +
> + /* Then we can call dwflst_sample_getframes: */
> + return dwflst_sample_getframes (dwfl, elf, pid, tid,
> + stack, stack_size,
> + regs, n_regs,
> + regs_mapping, n_regs_mapping,
> + callback, arg);
> +}
> diff --git a/libdwfl_stacktrace/libdwfl_stacktrace.h
> b/libdwfl_stacktrace/libdwfl_stacktrace.h
> index b236ddc4..84cb69a3 100644
> --- a/libdwfl_stacktrace/libdwfl_stacktrace.h
> +++ b/libdwfl_stacktrace/libdwfl_stacktrace.h
> @@ -113,14 +113,31 @@ extern int dwflst_tracker_linux_proc_find_elf
> (Dwfl_Module *mod, void **userdata
> const char *module_name,
> Dwarf_Addr base,
> char **file_name, Elf **);
>
> -
> /* Like dwfl_thread_getframes, but iterates through the frames for a
> - linux perf_events stack sample rather than a live thread. Calls
> - dwfl_attach_state on DWFL, with architecture specified by ELF, ELF
> - must remain valid during Dwfl lifetime. Returns zero if all frames
> - have been processed by the callback, returns -1 on error, or the
> - value of the callback when not DWARF_CB_OK. -1 returned on error
> - will set dwfl_errno (). */
> + stack sample rather than a live thread. Register file for the stack
> + sample is specified by REGS and N_REGS. For each item in REGS, the
> + REGS_MAPPING array specifies its position in the full register file
> + expected by the DWARF infrastructure. Calls dwfl_attach_state on
> + DWFL, with architecture specified by ELF, ELF must remain vaild
> + during Dwfl lifetime. Returns zero if all frames have been
> + processed by the callback, returns -1 on error, or the value of the
> + callback when not DWARF_CB_OK. -1 returned on error will set
> + dwfl_errno (). */
> +int dwflst_sample_getframes (Dwfl *dwfl, Elf *elf, pid_t pid, pid_t tid,
> + const void *stack, size_t stack_size,
> + const Dwarf_Word *regs, uint32_t n_regs,
> + const int *regs_mapping, size_t
> n_regs_mapping,
> + int (*callback) (Dwfl_Frame *state, void
> *arg),
> + void *arg)
> + __nonnull_attribute__ (1, 5, 7, 9, 11);
> +
> +/* Adapts dwflst_sample_getframes to linux perf_events stack sample
> + and register file data format. Calls dwfl_attach_state on DWFL,
> + with architecture specified by ELF, ELF must remain valid during
> + Dwfl lifetime. Returns zero if all frames have been processed by
> + the callback, returns -1 on error, or the value of the callback
> + when not DWARF_CB_OK. -1 returned on error will set dwfl_errno
> + (). */
> int dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf, pid_t pid, pid_t tid,
> const void *stack, size_t stack_size,
> const Dwarf_Word *regs, uint32_t n_regs,
> diff --git a/libebl/ebl-hooks.h b/libebl/ebl-hooks.h
> index 05474fbc..29ce9649 100644
> --- a/libebl/ebl-hooks.h
> +++ b/libebl/ebl-hooks.h
> @@ -158,21 +158,32 @@ bool EBLHOOK(set_initial_registers_tid) (pid_t tid,
> ebl_tid_registers_t *setfunc,
> void *arg);
>
> -/* Set process data from a perf_events sample and call SETFUNC one or more
> times.
> - Method should be present only when EBL_PERF_FRAME_REGS_MASK > 0,
> otherwise the
> - backend doesn't support unwinding from perf_events data. */
> -bool EBLHOOK(set_initial_registers_sample) (const Dwarf_Word *regs, uint32_t
> n_regs,
> - uint64_t regs_mask, uint32_t abi,
> +/* Set process data from a register sample and call SETFUNC one or more
> times.
> + Method should be present only when a 'default' strategy of populating an
> + array of DWARF regs and calling SETFUNC once would be inefficient, e.g.
> + on architectures with sparse/noncontiguous DWARF register files. */
> +bool EBLHOOK(set_initial_registers_sample) (const Dwarf_Word *regs,
> + uint32_t n_regs,
> + const int *regs_mapping,
> + size_t n_regs_mapping,
> ebl_tid_registers_t *setfunc,
> void *arg);
>
> -/* Extract the stack address from a perf_events register sample. */
> -Dwarf_Word EBLHOOK(sample_base_addr) (const Dwarf_Word *regs, uint32_t
> n_regs,
> - uint64_t regs_mask, uint32_t abi);
> -
> -/* Extract the instruction pointer from a perf_events register sample. */
> -Dwarf_Word EBLHOOK(sample_pc) (const Dwarf_Word *regs, uint32_t n_regs,
> - uint64_t regs_mask, uint32_t abi);
> +/* Extract the stack address and instruction pointer from a register sample.
> */
> +bool EBLHOOK(sample_sp_pc) (const Dwarf_Word *regs, uint32_t n_regs,
> + const int *regs_mapping,
> + uint32_t n_regs_mapping,
> + Dwarf_Word *sp, Dwarf_Word *pc);
> +
> +/* Translate from linux perf_events PERF_REGS_MASK and ABI to a generic
> + REGS_MAPPING array for use with ebl_set_initial_registers_sample().
> + Method should be present only when EBL_PERF_FRAME_REGS_MASK > 0,
> + otherwise the backend doesn't support unwinding from perf_events
> + data. */
> +bool EBLHOOK(sample_perf_regs_mapping) (Ebl *ebl,
> + uint64_t perf_regs_mask, uint32_t abi,
> + const int **regs_mapping,
> + size_t *n_regs_mapping);
>
> /* Convert *REGNO as is in DWARF to a lower range suitable for
> Dwarf_Frame->REGS indexing. */
> diff --git a/libebl/eblinitreg_sample.c b/libebl/eblinitreg_sample.c
> index 53244d1e..d5704dfa 100644
> --- a/libebl/eblinitreg_sample.c
> +++ b/libebl/eblinitreg_sample.c
> @@ -34,34 +34,59 @@
> #include <libeblP.h>
> #include <assert.h>
>
> -Dwarf_Word
> -ebl_sample_base_addr (Ebl *ebl,
> - const Dwarf_Word *regs, uint32_t n_regs,
> - uint64_t regs_mask, uint32_t abi)
> -{
> - assert (ebl->sample_base_addr != NULL);
> - return ebl->sample_base_addr (regs, n_regs, regs_mask, abi);
> -}
> -
> -Dwarf_Word
> -ebl_sample_pc (Ebl *ebl,
> - const Dwarf_Word *regs, uint32_t n_regs,
> - uint64_t regs_mask, uint32_t abi)
> +bool
> +ebl_sample_sp_pc (Ebl *ebl,
> + const Dwarf_Word *regs, uint32_t n_regs,
> + const int *regs_mapping, size_t n_regs_mapping,
> + Dwarf_Word *sp, Dwarf_Word *pc)
> {
> - assert (ebl->sample_pc != NULL);
> - return ebl->sample_pc (regs, n_regs, regs_mask, abi);
> + assert (ebl->sample_sp_pc != NULL);
> + return ebl->sample_sp_pc (regs, n_regs,
> + regs_mapping, n_regs_mapping,
> + sp, pc);
> }
>
> bool
> ebl_set_initial_registers_sample (Ebl *ebl,
> const Dwarf_Word *regs, uint32_t n_regs,
> - uint64_t regs_mask, uint32_t abi,
> + const int *regs_mapping, size_t
> n_regs_mapping,
> ebl_tid_registers_t *setfunc,
> void *arg)
> {
> - /* If set_initial_registers_sample is unsupported then
> PERF_FRAME_REGS_MASK is zero. */
> - assert (ebl->set_initial_registers_sample != NULL);
> - return ebl->set_initial_registers_sample (regs, n_regs, regs_mask, abi,
> setfunc, arg);
> + /* If set_initial_registers_sample is defined for this arch, use it. */
> + if (ebl->set_initial_registers_sample != NULL)
> + return ebl->set_initial_registers_sample (regs, n_regs,
> + regs_mapping, n_regs_mapping,
> + setfunc, arg);
> +
> + /* If set_initial_registers_sample is unspecified, then it is safe
> + to use the following generic code to populate a contiguous array
> + of dwarf_regs: */
> + Dwarf_Word dwarf_regs[64];
> + assert (ebl->frame_nregs < 64);
> + size_t i;
> + for (i = 0; i < ebl->frame_nregs; i++)
> + dwarf_regs[i] = 0x0;
> + for (i = 0; i < n_regs; i++)
> + {
> + if (i > n_regs_mapping)
> + break;
> + if (regs_mapping[i] < 0 || regs_mapping[i] >= (int)ebl->frame_nregs)
> + continue;
> + dwarf_regs[regs_mapping[i]] = regs[i];
> + }
> + return setfunc (0, ebl->frame_nregs, dwarf_regs, arg);
> +}
> +
> +bool
> +ebl_sample_perf_regs_mapping (Ebl *ebl,
> + uint64_t perf_regs_mask, uint32_t abi,
> + const int **regs_mapping, size_t
> *n_regs_mapping)
> +{
> + /* If sample_perf_regs_mapping is unsupported then PERF_FRAME_REGS_MASK is
> zero. */
> + assert (ebl->sample_perf_regs_mapping != NULL);
> + return ebl->sample_perf_regs_mapping (ebl, perf_regs_mask, abi,
> + regs_mapping, n_regs_mapping);
> }
>
> uint64_t
> diff --git a/libebl/libebl.h b/libebl/libebl.h
> index a64d70e9..5b0e7000 100644
> --- a/libebl/libebl.h
> +++ b/libebl/libebl.h
> @@ -340,32 +340,46 @@ extern bool ebl_set_initial_registers_tid (Ebl *ebl,
> extern size_t ebl_frame_nregs (Ebl *ebl)
> __nonnull_attribute__ (1);
>
> -/* Callback to set process data from a linux perf_events sample.
> - EBL architecture has to have EBL_PERF_FRAME_REGS_MASK > 0, otherwise the
> - backend doesn't support unwinding from perf_events sample data. */
> +/* Callback to set process data from a register sample. For each item
> + in REGS, the REGS_MAPPING array specifies its position in the full
> + register file expected by the DWARF infrastructure. */
> extern bool ebl_set_initial_registers_sample (Ebl *ebl,
> - const Dwarf_Word *regs,
> uint32_t n_regs,
> - uint64_t regs_mask, uint32_t
> abi,
> + const Dwarf_Word *regs,
> + uint32_t n_regs,
> + const int *regs_mapping,
> + size_t n_regs_mapping,
> ebl_tid_registers_t *setfunc,
> void *arg)
> __nonnull_attribute__ (1, 2, 6);
>
> -/* Extract the stack address from a perf_events register sample. */
> -Dwarf_Word ebl_sample_base_addr (Ebl *ebl,
> - const Dwarf_Word *regs, uint32_t n_regs,
> - uint64_t regs_mask, uint32_t abi)
> - __nonnull_attribute__ (1, 2);
> -
> -/* Extract the instruction pointer from a perf_events register sample. */
> -Dwarf_Word ebl_sample_pc (Ebl *ebl,
> - const Dwarf_Word *regs, uint32_t n_regs,
> - uint64_t regs_mask, uint32_t abi)
> - __nonnull_attribute__ (1, 2);
> -
> +/* Extract stack address SP and instruction pointer PC from a register
> + sample. For each item in REGS, the REGS_MAPPING array specifies
> + its position in the full register file expected by the DWARF
> + infrastructure. */
> +extern bool ebl_sample_sp_pc (Ebl *ebl,
> + const Dwarf_Word *regs, uint32_t n_regs,
> + const int *regs_mapping, size_t n_regs_mapping,
> + Dwarf_Word *sp, Dwarf_Word *pc)
> + __nonnull_attribute__ (1, 2, 4);
> +
> +/* Translate from linux perf_events PERF_REGS_MASK and ABI to a generic
> + REGS_MAPPING array for use with ebl_set_initial_registers_sample().
> + EBL architecture has to have EBL_PERF_FRAME_REGS_MASK > 0,
> + otherwise the backend doesn't support unwinding from perf_events
> + sample data. The PERF_REGS_MASK and REGS_MAPPING are likely but
> + not guaranteed to stay constant throughout a profiling session, and
> + so the result is cached in the Ebl and only recomputed if an
> + unexpected PERF_REGS_MASK is passed to this function. */
> +extern bool ebl_sample_perf_regs_mapping (Ebl *ebl,
> + uint64_t perf_regs_mask,
> + uint32_t abi,
> + const int **regs_mapping,
> + size_t *n_regs_mapping)
> + __nonnull_attribute__ (1, 4, 5);
>
> /* Preferred sample_regs_user mask to request from linux perf_events
> to allow unwinding on EBL architecture. Omitting some of these
> - registers may result in failed or inaccurate unwinding. */
> + registers may result in failed or inaccurate unwinding. */
> extern uint64_t ebl_perf_frame_regs_mask (Ebl *ebl)
> __nonnull_attribute__ (1);
>
> diff --git a/libebl/libeblP.h b/libebl/libeblP.h
> index be14cc20..348da49e 100644
> --- a/libebl/libeblP.h
> +++ b/libebl/libeblP.h
> @@ -65,6 +65,13 @@ struct ebl
> perf_events sample data iff PERF_FRAME_REGS_MASK > 0. */
> uint64_t perf_frame_regs_mask;
>
> + /* A cached mapping from a specified linux perf_events regs_mask to
> + the corresponding regs_mapping array, to reduce
> + ebl_sample_perf_regs_mapping() recomputations. */
> + uint64_t cached_perf_regs_mask;
> + int *cached_regs_mapping;
> + size_t cached_n_regs_mapping;
> +
> /* Offset to apply to the value of the return_address_register, as
> fetched from a Dwarf CFI. This is used by some backends, where
> the return_address_register actually contains the call
> --
> 2.51.0
>