This patch adds a generic dwflst_sample_getframes() API that does not
depend on perf_events concepts, in particular the
linux-kernel-specific enum defining the perf_regs_mask register order.
This involves reworking the register-handling backend to use
regs_mapping arrays rather than perf_regs_mask, and provide a way to
translate perf_regs_mask to regs_mapping.
A regs_mapping array, for each item in a provided regs[] array,
specifies its position in the full register file expected by the DWARF
functionality.
TODO: Fixup copy_word dependency on abi in dwflst_sample_frame.c?
* libdwfl_stacktrace/Makefile.am: Rename dwflst_sample_frame.c from
dwflst_perf_frame.c.
* libdwfl_stacktrace/libdwfl_stacktrace.h (dwflst_sample_getframes):
New function providing unwinding functionality with a regs_mapping
array rather than a linux-kernel-dependent perf_regs_mask.
* libdwfl_stacktrace/dwflst_sample_frame.c: Renamed from
dwflst_perf_frame.c.
(struct sample_info): Rename from perf_sample_info, include
regs_mapping field.
(sample_next_thread): Renamed struct sample_info.
(sample_getthread): Renamed struct sample_info.
(elf_memory_read): Renamed struct sample_info.
(sample_memory_read): Renamed struct sample_info.
(sample_set_initial_registers): Renamed struct sample_info,
pass regs_mapping to ebl_set_initial_registers_sample.
(dwflst_sample_getframes): New function.
(dwflst_perf_sample_getframes): Reimplement in terms of
dwflst_sample_getframes and ebl_sample_perf_regs_mapping.
* libebl/ebl-hooks.h (set_initial_registers_sample): Now
takes regs_mapping instead of regs_mask.
(sample_base_addr): Removed.
(sample_pc): Removed.
(sample_sp_pc): New function combining the removed functions for
efficiency.
(sample_perf_regs_mapping): New function translating
perf_regs_mask to regs_mapping array.
* libebl/eblinitreg_sample.c (ebl_sample_base_addr): Removed.
(ebl_sample_pc): Removed.
(ebl_sample_sp_pc): New function.
(ebl_set_initial_registers_sample): Take regs_mapping, provide
a default implementation for contiguous dwarf_regs array.
(ebl_sample_perf_regs_mapping): New function.
* libebl/libebl.h (ebl_set_initial_registers_sample): Now takes
regs_mapping instead of regs_mask.
(ebl_sample_base_addr): Removed.
(ebl_sample_pc): Removed.
(ebl_sample_sp_pc): New function.
(ebl_sample_perf_regs_mapping): New function.
* libebl/libeblP.h (struct ebl): Add caching fields to remove the
need to repeat a sample_perf_regs_mapping() computation for
every frame when the perf_regs_mask is consistent.
* backends/Makefile.am: Remove no-longer-needed linux-perf-regs.c.
* backends/i386_init.c (i386_init): Renamed sample_* functions,
added cached_regs_mapping and related fields/functions.
* backends/i386_initreg_sample.c (i386_sample_base_addr): Removed.
(i386_sample_pc): Removed.
(i386_sample_sp_pc): New function combining the removed functions.
(i386_set_initial_registers_sample): Removed.
(i386_sample_perf_regs_mapping): New function translating
perf_regs_mask to regs_mapping array.
* backends/linux-perf-regs.c: Removed as perf_sample_find_reg is no
longer needed.
* backends/x86_64_init.c (x86_64_init): Renamed sample_* functions,
added cached_regs_mapping and related fields/functions.
* backends/x86_64_initreg_sample.c (x86_64_sample_base_addr): Removed.
(x86_64_sample_pc): Removed.
(x86_64_sample_sp_pc): New function combining the removed functions.
(x86_64_set_initial_registers_sample): Removed.
(x86_64_sample_perf_regs_mapping): New function translating
perf_regs_mask to regs_mapping array.
* backends/x86_initreg_sample.c (x86_set_initial_registers_sample):
Removed.
(x86_sample_sp_pc): New function.
(x86_sample_perf_regs_mapping): New function translating
perf_regs_mask to regs_mapping array.
---
backends/Makefile.am | 2 +-
backends/i386_init.c | 9 +-
backends/i386_initreg_sample.c | 72 +++---------
backends/linux-perf-regs.c | 48 --------
backends/x86_64_init.c | 9 +-
backends/x86_64_initreg_sample.c | 70 +++---------
backends/x86_initreg_sample.c | 108 ++++++++++++++----
libdwfl_stacktrace/Makefile.am | 2 +-
...lst_perf_frame.c => dwflst_sample_frame.c} | 98 ++++++++++------
libdwfl_stacktrace/libdwfl_stacktrace.h | 31 +++--
libebl/ebl-hooks.h | 35 ++++--
libebl/eblinitreg_sample.c | 63 +++++++---
libebl/libebl.h | 50 +++++---
libebl/libeblP.h | 7 ++
14 files changed, 321 insertions(+), 283 deletions(-)
delete mode 100644 backends/linux-perf-regs.c
rename libdwfl_stacktrace/{dwflst_perf_frame.c => dwflst_sample_frame.c} (75%)
diff --git a/backends/Makefile.am b/backends/Makefile.am
index 8ccbdb50..7a820df0 100644
--- a/backends/Makefile.am
+++ b/backends/Makefile.am
@@ -121,7 +121,7 @@ am_libebl_backends_pic_a_OBJECTS =
$(libebl_backends_a_SOURCES:.c=.os)
noinst_HEADERS = libebl_CPU.h libebl_PERF_FLAGS.h common-reloc.c \
linux-core-note.c x86_corenote.c \
- linux-perf-regs.c x86_initreg_sample.c
+ x86_initreg_sample.c
EXTRA_DIST = $(modules:=_reloc.def)
diff --git a/backends/i386_init.c b/backends/i386_init.c
index e64ef6ed..a980e71a 100644
--- a/backends/i386_init.c
+++ b/backends/i386_init.c
@@ -60,10 +60,13 @@ i386_init (Elf *elf __attribute__ ((unused)),
(Likely an artifact of reusing that header between i386/x86_64.) */
eh->frame_nregs = 9;
HOOK (eh, set_initial_registers_tid);
- HOOK (eh, set_initial_registers_sample);
- HOOK (eh, sample_base_addr);
- HOOK (eh, sample_pc);
+ /* set_initial_registers_sample is default ver */
+ HOOK (eh, sample_sp_pc);
+ HOOK (eh, sample_perf_regs_mapping);
eh->perf_frame_regs_mask = PERF_FRAME_REGISTERS_I386;
+ eh->cached_perf_regs_mask = 0;
+ eh->cached_regs_mapping = NULL;
+ eh->cached_n_regs_mapping = -1;
HOOK (eh, unwind);
return eh;
diff --git a/backends/i386_initreg_sample.c b/backends/i386_initreg_sample.c
index 677393c9..94955191 100644
--- a/backends/i386_initreg_sample.c
+++ b/backends/i386_initreg_sample.c
@@ -31,6 +31,7 @@
#endif
#include <stdlib.h>
+#include <assert.h>
#if (defined __i386__ || defined __x86_64__) && defined(__linux__)
# include <linux/perf_event.h>
# include <asm/perf_regs.h>
@@ -40,69 +41,26 @@
#include "libebl_CPU.h"
#include "libebl_PERF_FLAGS.h"
#if (defined __i386__ || defined __x86_64__) && defined(__linux__)
-# include "linux-perf-regs.c"
# include "x86_initreg_sample.c"
#endif
-/* Register ordering cf. linux arch/x86/include/uapi/asm/perf_regs.h,
- enum perf_event_x86_regs: */
-Dwarf_Word
-i386_sample_base_addr (const Dwarf_Word *regs, uint32_t n_regs,
- uint64_t regs_mask,
- /* XXX hypothetically needed if abi varies
- between samples in the same process;
- not needed on x86 */
- uint32_t abi __attribute__((unused)))
-{
-#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__)
- (void)regs;
- (void)n_regs;
- (void)regs_mask;
- return 0;
-#else /* __i386__ || __x86_64__ */
- (void)regs;
- (void)n_regs;
- (void)regs_mask;
- return perf_sample_find_reg (regs, n_regs, regs_mask,
- 7 /* index into perf_event_x86_regs */);
-#endif
-}
-
-Dwarf_Word
-i386_sample_pc (const Dwarf_Word *regs, uint32_t n_regs,
- uint64_t regs_mask,
- uint32_t abi __attribute__((unused)))
+bool
+i386_sample_sp_pc (const Dwarf_Word *regs, uint32_t n_regs,
+ const int *regs_mapping, uint32_t n_regs_mapping,
+ Dwarf_Word *sp, Dwarf_Word *pc)
{
-#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__)
- (void)regs;
- (void)n_regs;
- (void)regs_mask;
- return 0;
-#else /* __i386__ || __x86_64__ */
- return perf_sample_find_reg (regs, n_regs, regs_mask,
- 8 /* index into perf_event_x86_regs */);
-#endif
+ /* XXX for dwarf_regs indices, compare i386_initreg.c */
+ return x86_sample_sp_pc (regs, n_regs, regs_mapping, n_regs_mapping,
+ sp, 4 /* index of sp in dwarf_regs */,
+ pc, 8 /* index of pc in dwarf_regs */);
}
bool
-i386_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs,
- uint64_t regs_mask, uint32_t abi,
- ebl_tid_registers_t *setfunc,
- void *arg)
+i386_sample_perf_regs_mapping (Ebl *ebl,
+ uint64_t perf_regs_mask, uint32_t abi,
+ const int **regs_mapping,
+ size_t *n_regs_mapping)
{
-#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__)
- (void)regs;
- (void)n_regs;
- (void)regs_mask;
- (void)abi;
- (void)setfunc;
- (void)arg;
- return false;
-#else /* __i386__ || __x86_64__ */
- Dwarf_Word dwarf_regs[9];
- if (!x86_set_initial_registers_sample (regs, n_regs, regs_mask,
- abi, dwarf_regs, 9))
- return false;
- return setfunc (0, 9, dwarf_regs, arg);
-#endif
+ return x86_sample_perf_regs_mapping (ebl, perf_regs_mask, abi,
+ regs_mapping, n_regs_mapping);
}
diff --git a/backends/linux-perf-regs.c b/backends/linux-perf-regs.c
deleted file mode 100644
index 22ad67c6..00000000
--- a/backends/linux-perf-regs.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* Common pieces for handling registers in a linux perf_events sample.
- Copyright (C) 2025 Red Hat, Inc.
- This file is part of elfutils.
-
- This file is free software; you can redistribute it and/or modify
- it under the terms of either
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at
- your option) any later version
-
- or
-
- * the GNU General Public License as published by the Free
- Software Foundation; either version 2 of the License, or (at
- your option) any later version
-
- or both in parallel, as here.
-
- elfutils is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received copies of the GNU General Public License and
- the GNU Lesser General Public License along with this program. If
- not, see <http://www.gnu.org/licenses/>. */
-
-static Dwarf_Word
-perf_sample_find_reg (const Dwarf_Word *regs, uint32_t n_regs,
- uint64_t regs_mask,
- int target)
-{
- int j, k; uint64_t bit;
- for (j = 0, k = 0, bit = 1; k < PERF_REG_X86_64_MAX; k++, bit <<= 1)
- {
- if (bit & regs_mask) {
- if (n_regs <= (uint32_t) j)
- return 0; /* regs_mask count doesn't match n_regs */
- if (k == target)
- return regs[j];
- if (k > target)
- return 0; /* regs_mask doesn't include desired reg */
- j++;
- }
- }
- return 0;
-}
diff --git a/backends/x86_64_init.c b/backends/x86_64_init.c
index 6a1cbc4b..5f929758 100644
--- a/backends/x86_64_init.c
+++ b/backends/x86_64_init.c
@@ -63,10 +63,13 @@ x86_64_init (Elf *elf __attribute__ ((unused)),
/* gcc/config/ #define DWARF_FRAME_REGISTERS. */
eh->frame_nregs = 17;
HOOK (eh, set_initial_registers_tid);
- HOOK (eh, set_initial_registers_sample);
- HOOK (eh, sample_base_addr);
- HOOK (eh, sample_pc);
+ /* set_initial_registers_sample is default ver */
+ HOOK (eh, sample_sp_pc);
+ HOOK (eh, sample_perf_regs_mapping);
eh->perf_frame_regs_mask = PERF_FRAME_REGISTERS_X86_64;
+ eh->cached_perf_regs_mask = 0;
+ eh->cached_regs_mapping = NULL;
+ eh->cached_n_regs_mapping = -1;
HOOK (eh, unwind);
HOOK (eh, check_reloc_target_type);
diff --git a/backends/x86_64_initreg_sample.c b/backends/x86_64_initreg_sample.c
index 48d14bc8..9dd708c9 100644
--- a/backends/x86_64_initreg_sample.c
+++ b/backends/x86_64_initreg_sample.c
@@ -31,6 +31,7 @@
#endif
#include <stdlib.h>
+#include <assert.h>
#if defined(__x86_64__) && defined(__linux__)
# include <linux/perf_event.h>
# include <asm/perf_regs.h>
@@ -40,67 +41,26 @@
#include "libebl_CPU.h"
#include "libebl_PERF_FLAGS.h"
#if defined(__x86_64__) && defined(__linux__)
-# include "linux-perf-regs.c"
# include "x86_initreg_sample.c"
#endif
-/* Register ordering cf. linux arch/x86/include/uapi/asm/perf_regs.h,
- enum perf_event_x86_regs: */
-Dwarf_Word
-x86_64_sample_base_addr (const Dwarf_Word *regs, uint32_t n_regs,
- uint64_t regs_mask,
- /* XXX hypothetically needed if abi varies
- between samples in the same process;
- not needed on x86*/
- uint32_t abi __attribute__((unused)))
-{
-#if !defined(__x86_64__) || !defined(__linux__)
- (void)regs;
- (void)n_regs;
- (void)regs_mask;
- return 0;
-#else /* __x86_64__ */
- return perf_sample_find_reg (regs, n_regs, regs_mask,
- 7 /* index into perf_event_x86_regs */);
-#endif
-}
-
-Dwarf_Word
-x86_64_sample_pc (const Dwarf_Word *regs, uint32_t n_regs,
- uint64_t regs_mask,
- uint32_t abi __attribute__((unused)))
+bool
+x86_64_sample_sp_pc (const Dwarf_Word *regs, uint32_t n_regs,
+ const int *regs_mapping, uint32_t n_regs_mapping,
+ Dwarf_Word *sp, Dwarf_Word *pc)
{
-#if !defined(__x86_64__) || !defined(__linux__)
- (void)regs;
- (void)n_regs;
- (void)regs_mask;
- return 0;
-#else /* __x86_64__ */
- return perf_sample_find_reg (regs, n_regs, regs_mask,
- 8 /* index into perf_event_x86_regs */);
-#endif
+ /* XXX for dwarf_regs indices, compare x86_64_initreg.c */
+ return x86_sample_sp_pc (regs, n_regs, regs_mapping, n_regs_mapping,
+ sp, 7 /* index of sp in dwarf_regs */,
+ pc, 16 /* index of pc in dwarf_regs */);
}
bool
-x86_64_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs,
- uint64_t regs_mask, uint32_t abi,
- ebl_tid_registers_t *setfunc,
- void *arg)
+x86_64_sample_perf_regs_mapping (Ebl *ebl,
+ uint64_t perf_regs_mask, uint32_t abi,
+ const int **regs_mapping,
+ size_t *n_regs_mapping)
{
-#if !defined(__x86_64__) || !defined(__linux__)
- (void)regs;
- (void)n_regs;
- (void)regs_mask;
- (void)abi;
- (void)setfunc;
- (void)arg;
- return false;
-#else /* __x86_64__ */
- Dwarf_Word dwarf_regs[17];
- if (!x86_set_initial_registers_sample (regs, n_regs, regs_mask,
- abi, dwarf_regs, 9))
- return false;
- return setfunc (0, 17, dwarf_regs, arg);
-#endif
+ return x86_sample_perf_regs_mapping (ebl, perf_regs_mask, abi,
+ regs_mapping, n_regs_mapping);
}
-
diff --git a/backends/x86_initreg_sample.c b/backends/x86_initreg_sample.c
index 8d6b471b..f9b45462 100644
--- a/backends/x86_initreg_sample.c
+++ b/backends/x86_initreg_sample.c
@@ -1,4 +1,4 @@
-/* x86 linux perf_events register handling, pieces common to x86-64 and i386.
+/* x86 stack sample register handling, pieces common to x86-64 and i386.
Copyright (C) 2025 Red Hat, Inc.
This file is part of elfutils.
@@ -27,13 +27,52 @@
not, see <http://www.gnu.org/licenses/>. */
static bool
-x86_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs,
- uint64_t regs_mask, uint32_t abi,
- Dwarf_Word *dwarf_regs, int expected_regs)
+x86_sample_sp_pc (const Dwarf_Word *regs, uint32_t n_regs,
+ const int *regs_mapping, uint32_t n_regs_mapping,
+ Dwarf_Word *sp, uint sp_index /* into dwarf_regs */,
+ Dwarf_Word *pc, uint pc_index /* into dwarf_regs */)
{
-#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__)
+ if (sp != NULL) *sp = 0;
+ if (pc != NULL) *pc = 0;
+#if !defined(__x86_64__)
+ (void)regs;
+ (void)n_regs;
+ (void)regs_mapping;
+ (void)n_regs_mapping;
return false;
-#else /* __i386__ || __x86_64__ */
+#else /* __x86_64__ */
+ /* TODO: Register locations could be cached and rechecked on a
+ fastpath without needing to loop? */
+ int j, need_sp = (sp != NULL), need_pc = (pc != NULL);
+ for (j = 0; (need_sp || need_pc) && n_regs_mapping > (uint32_t)j; j++)
+ {
+ if (n_regs < (uint32_t)j) break;
+ if (need_sp && regs_mapping[j] == (int)sp_index)
+ {
+ *sp = regs[j]; need_sp = false;
+ }
+ if (need_pc && regs_mapping[j] == (int)pc_index)
+ {
+ *pc = regs[j]; need_pc = false;
+ }
+ }
+ return (!need_sp && !need_pc);
+#endif
+}
+
+static bool
+x86_sample_perf_regs_mapping (Ebl *ebl,
+ uint64_t perf_regs_mask, uint32_t abi,
+ const int **regs_mapping,
+ size_t *n_regs_mapping)
+{
+ if (perf_regs_mask != 0 && ebl->cached_perf_regs_mask == perf_regs_mask)
+ {
+ *regs_mapping = ebl->cached_regs_mapping;
+ *n_regs_mapping = ebl->cached_n_regs_mapping;
+ return true;
+ }
+
/* The following facts are needed to translate x86 registers correctly:
- perf register order seen in linux arch/x86/include/uapi/asm/perf_regs.h
The registers array is built in the same order as the enum!
@@ -52,39 +91,58 @@ x86_set_initial_registers_sample (const Dwarf_Word *regs,
uint32_t n_regs,
bool is_abi32 = (abi == PERF_SAMPLE_REGS_ABI_32);
/* Locations of dwarf_regs in the perf_event_x86_regs enum order,
- not the regs[i] array (which will include a subset of the regs): */
+ not the regs[] array (which will include a subset of the regs): */
static const int regs_i386[] = {0, 2, 3, 1, 7/*sp*/, 6, 4, 5, 8/*ip*/};
static const int regs_x86_64[] = {0, 3, 2, 1, 4, 5, 6, 7/*sp*/,
16/*r8 after flags+segment*/, 17, 18, 19,
20, 21, 22, 23,
8/*ip*/};
const int *dwarf_to_perf = is_abi32 ? regs_i386 : regs_x86_64;
- /* Locations of perf_regs in the regs[] array, according to regs_mask: */
- int perf_to_regs[PERF_REG_X86_64_MAX];
- uint64_t expected_mask = is_abi32 ? PERF_FRAME_REGISTERS_I386 :
PERF_FRAME_REGISTERS_X86_64;
- int j, k; uint64_t bit;
- /* TODO: Is it worth caching this perf_to_regs computation as long
- as regs_mask is kept the same across repeated calls? */
- for (j = 0, k = 0, bit = 1; k < PERF_REG_X86_64_MAX; k++, bit <<= 1)
+ /* Count bits and allocate regs_mapping: */
+ int j, k, kmax, count; uint64_t bit;
+ for (k = 0, kmax = -1, count = 0, bit = 1;
+ k < PERF_REG_X86_64_MAX; k++, bit <<= 1)
{
- if ((bit & expected_mask) && (bit & regs_mask)) {
- if (n_regs <= (uint32_t)j)
- return false; /* regs_mask count doesn't match n_regs */
- perf_to_regs[k] = j;
- j++;
- } else {
- perf_to_regs[k] = -1;
+ if ((bit & perf_regs_mask)) {
+ count++;
+ kmax = k;
}
}
+ /* TODO: Is locking necessary? */
+ ebl->cached_perf_regs_mask = perf_regs_mask;
+ ebl->cached_regs_mapping = (int *)calloc (count, sizeof(int));
+ ebl->cached_n_regs_mapping = count;
- for (int i = 0; i < expected_regs; i++)
+ /* Locations of perf_regs in the regs[] array, according to
+ perf_regs_mask: */
+ int perf_to_regs[PERF_REG_X86_64_MAX];
+ uint64_t expected_mask = is_abi32 ?
+ PERF_FRAME_REGISTERS_I386 : PERF_FRAME_REGISTERS_X86_64;
+ for (j = 0, k = 0, bit = 1; k <= kmax; k++, bit <<= 1)
+ {
+ if ((bit & expected_mask) && (bit && perf_regs_mask))
+ {
+ perf_to_regs[k] = j;
+ j++;
+ }
+ else
+ {
+ perf_to_regs[k] = -1;
+ }
+ }
+ assert (j <= (int)ebl->cached_n_regs_mapping);
+
+ /* Locations of perf_regs in the dwarf_regs array, according to
+ perf_regs_mask and perf_to_regs[]: */
+ for (size_t i = 0; i < ebl->frame_nregs; i++)
{
k = dwarf_to_perf[i];
j = perf_to_regs[k];
if (j < 0) continue;
- if (n_regs <= (uint32_t)j) continue;
- dwarf_regs[i] = regs[j];
+ ebl->cached_regs_mapping[j] = i;
}
+
+ *regs_mapping = ebl->cached_regs_mapping;
+ *n_regs_mapping = ebl->cached_n_regs_mapping;
return true;
-#endif /* __i386__ || __x86_64__ */
}
diff --git a/libdwfl_stacktrace/Makefile.am b/libdwfl_stacktrace/Makefile.am
index 99a80b5c..b9242129 100644
--- a/libdwfl_stacktrace/Makefile.am
+++ b/libdwfl_stacktrace/Makefile.am
@@ -45,7 +45,7 @@ libdwfl_stacktrace_a_SOURCES = dwflst_process_tracker.c \
dwflst_tracker_elftab.c \
dwflst_tracker_dwfltab.c \
libdwfl_stacktrace_next_prime.c \
- dwflst_perf_frame.c
+ dwflst_sample_frame.c
libdwfl_stacktrace = $(libdw)
libdw = ../libdw/libdw.so
diff --git a/libdwfl_stacktrace/dwflst_perf_frame.c
b/libdwfl_stacktrace/dwflst_sample_frame.c
similarity index 75%
rename from libdwfl_stacktrace/dwflst_perf_frame.c
rename to libdwfl_stacktrace/dwflst_sample_frame.c
index 4fc60183..b5339a59 100644
--- a/libdwfl_stacktrace/dwflst_perf_frame.c
+++ b/libdwfl_stacktrace/dwflst_sample_frame.c
@@ -67,7 +67,7 @@ uint64_t dwflst_perf_sample_preferred_regs_mask (GElf_Half
machine)
return 0;
}
-struct perf_sample_info {
+struct sample_info {
pid_t pid;
pid_t tid;
Dwarf_Addr base_addr;
@@ -75,8 +75,9 @@ struct perf_sample_info {
size_t stack_size;
const Dwarf_Word *regs;
uint n_regs;
- uint64_t perf_regs_mask;
- uint abi;
+ const int *regs_mapping;
+ size_t n_regs_mapping;
+ uint32_t abi;
Dwarf_Addr pc;
};
@@ -88,8 +89,8 @@ static pid_t
sample_next_thread (Dwfl *dwfl __attribute__ ((unused)), void *dwfl_arg,
void **thread_argp)
{
- struct perf_sample_info *sample_arg =
- (struct perf_sample_info *)dwfl_arg;
+ struct sample_info *sample_arg =
+ (struct sample_info *)dwfl_arg;
if (*thread_argp == NULL)
{
*thread_argp = (void *)0xea7b3375;
@@ -104,8 +105,8 @@ static bool
sample_getthread (Dwfl *dwfl __attribute__ ((unused)), pid_t tid,
void *dwfl_arg, void **thread_argp)
{
- struct perf_sample_info *sample_arg =
- (struct perf_sample_info *)dwfl_arg;
+ struct sample_info *sample_arg =
+ (struct sample_info *)dwfl_arg;
*thread_argp = (void *)sample_arg;
if (sample_arg->tid != tid)
{
@@ -138,8 +139,8 @@ sample_getthread (Dwfl *dwfl __attribute__ ((unused)),
pid_t tid,
static bool
elf_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void *arg)
{
- struct perf_sample_info *sample_arg =
- (struct perf_sample_info *)arg;
+ struct sample_info *sample_arg =
+ (struct sample_info *)arg;
Dwfl_Module *mod = INTUSE(dwfl_addrmodule) (dwfl, addr);
Dwarf_Addr bias;
Elf_Scn *section = INTUSE(dwfl_module_address_section) (mod, &addr, &bias);
@@ -153,7 +154,7 @@ elf_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word
*result, void *arg)
Elf_Data *data = elf_getdata(section, NULL);
if (data && data->d_buf && data->d_size > addr) {
uint8_t *d = ((uint8_t *)data->d_buf) + addr;
- copy_word(result, d, sample_arg->abi);
+ copy_word(result, d, sample_arg->abi); /* TODO */
return true;
}
__libdwfl_seterrno(DWFL_E_ADDR_OUTOFRANGE);
@@ -163,36 +164,37 @@ elf_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word
*result, void *arg)
static bool
sample_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void *arg)
{
- struct perf_sample_info *sample_arg =
- (struct perf_sample_info *)arg;
+ struct sample_info *sample_arg =
+ (struct sample_info *)arg;
/* Imitate read_cached_memory() with the stack sample data as the cache. */
if (addr < sample_arg->base_addr ||
addr - sample_arg->base_addr >= sample_arg->stack_size)
return elf_memory_read(dwfl, addr, result, arg);
const uint8_t *d = &sample_arg->stack[addr - sample_arg->base_addr];
- copy_word(result, d, sample_arg->abi);
+ copy_word(result, d, sample_arg->abi); /* TODO */
return true;
}
+
static bool
sample_set_initial_registers (Dwfl_Thread *thread, void *arg)
{
- struct perf_sample_info *sample_arg =
- (struct perf_sample_info *)arg;
+ struct sample_info *sample_arg =
+ (struct sample_info *)arg;
INTUSE(dwfl_thread_state_register_pc) (thread, sample_arg->pc);
Dwfl_Process *process = thread->process;
Ebl *ebl = process->ebl;
return ebl_set_initial_registers_sample
(ebl, sample_arg->regs, sample_arg->n_regs,
- sample_arg->perf_regs_mask, sample_arg->abi,
+ sample_arg->regs_mapping, sample_arg->n_regs_mapping,
__libdwfl_set_initial_registers_thread, thread);
}
static void
sample_detach (Dwfl *dwfl __attribute__ ((unused)), void *dwfl_arg)
{
- struct perf_sample_info *sample_arg =
- (struct perf_sample_info *)dwfl_arg;
+ struct sample_info *sample_arg =
+ (struct sample_info *)dwfl_arg;
free (sample_arg);
}
@@ -207,18 +209,18 @@ static const Dwfl_Thread_Callbacks
sample_thread_callbacks =
};
int
-dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf,
- pid_t pid, pid_t tid,
- const void *stack, size_t stack_size,
- const Dwarf_Word *regs, uint n_regs,
- uint64_t perf_regs_mask, uint abi,
- int (*callback) (Dwfl_Frame *state, void *arg),
- void *arg)
+dwflst_sample_getframes (Dwfl *dwfl, Elf *elf,
+ pid_t pid, pid_t tid,
+ const void *stack, size_t stack_size,
+ const Dwarf_Word *regs, uint n_regs,
+ const int *regs_mapping, size_t n_regs_mapping,
+ int (*callback) (Dwfl_Frame *state, void *arg),
+ void *arg)
{
/* TODO: Lock the dwfl to ensure attach_state does not interfere
with other dwfl_perf_sample_getframes calls. */
- struct perf_sample_info *sample_arg;
+ struct sample_info *sample_arg;
bool attached = false;
if (dwfl->process != NULL)
{
@@ -241,21 +243,49 @@ dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf,
sample_arg->stack_size = stack_size;
sample_arg->regs = regs;
sample_arg->n_regs = n_regs;
- sample_arg->perf_regs_mask = perf_regs_mask;
- sample_arg->abi = abi;
+ sample_arg->regs_mapping = regs_mapping;
+ sample_arg->n_regs_mapping = n_regs_mapping;
+ /* TODO: Also populate sample_arg->abi. */
if (! attached
&& ! INTUSE(dwfl_attach_state) (dwfl, elf, pid,
&sample_thread_callbacks, sample_arg))
- return -1;
+ return -1;
- /* Now that Dwfl is attached, we can access its Ebl: */
Dwfl_Process *process = dwfl->process;
Ebl *ebl = process->ebl;
- sample_arg->base_addr = ebl_sample_base_addr(ebl, regs, n_regs,
- perf_regs_mask, abi);
- sample_arg->pc = ebl_sample_pc(ebl, regs, n_regs,
- perf_regs_mask, abi);
+ ebl_sample_sp_pc(ebl, regs, n_regs,
+ regs_mapping, n_regs_mapping,
+ &sample_arg->base_addr, &sample_arg->pc); /* TODO SUPPORT */
return INTUSE(dwfl_getthread_frames) (dwfl, tid, callback, arg);
}
+
+int
+dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf,
+ pid_t pid, pid_t tid,
+ const void *stack, size_t stack_size,
+ const Dwarf_Word *regs, uint32_t n_regs,
+ uint64_t perf_regs_mask, uint32_t abi,
+ int (*callback) (Dwfl_Frame *state, void *arg),
+ void *arg)
+{
+ /* Select the regs_mapping based on architecture. This will be
+ cached in ebl to avoid having to recompute the regs_mapping array
+ when perf_regs_mask is consistent for the entire session: */
+ const int *regs_mapping;
+ size_t n_regs_mapping;
+ Dwfl_Process *process = dwfl->process;
+ Ebl *ebl = process->ebl;
+ if (!ebl_sample_perf_regs_mapping(ebl,
+ perf_regs_mask, abi,
+ ®s_mapping, &n_regs_mapping))
+ return -1;
+
+ /* Then we can call dwflst_sample_getframes: */
+ return dwflst_sample_getframes (dwfl, elf, pid, tid,
+ stack, stack_size,
+ regs, n_regs,
+ regs_mapping, n_regs_mapping,
+ callback, arg);
+}
diff --git a/libdwfl_stacktrace/libdwfl_stacktrace.h
b/libdwfl_stacktrace/libdwfl_stacktrace.h
index b236ddc4..84cb69a3 100644
--- a/libdwfl_stacktrace/libdwfl_stacktrace.h
+++ b/libdwfl_stacktrace/libdwfl_stacktrace.h
@@ -113,14 +113,31 @@ extern int dwflst_tracker_linux_proc_find_elf
(Dwfl_Module *mod, void **userdata
const char *module_name,
Dwarf_Addr base,
char **file_name, Elf **);
-
/* Like dwfl_thread_getframes, but iterates through the frames for a
- linux perf_events stack sample rather than a live thread. Calls
- dwfl_attach_state on DWFL, with architecture specified by ELF, ELF
- must remain valid during Dwfl lifetime. Returns zero if all frames
- have been processed by the callback, returns -1 on error, or the
- value of the callback when not DWARF_CB_OK. -1 returned on error
- will set dwfl_errno (). */
+ stack sample rather than a live thread. Register file for the stack
+ sample is specified by REGS and N_REGS. For each item in REGS, the
+ REGS_MAPPING array specifies its position in the full register file
+ expected by the DWARF infrastructure. Calls dwfl_attach_state on
+ DWFL, with architecture specified by ELF, ELF must remain vaild
+ during Dwfl lifetime. Returns zero if all frames have been
+ processed by the callback, returns -1 on error, or the value of the
+ callback when not DWARF_CB_OK. -1 returned on error will set
+ dwfl_errno (). */
+int dwflst_sample_getframes (Dwfl *dwfl, Elf *elf, pid_t pid, pid_t tid,
+ const void *stack, size_t stack_size,
+ const Dwarf_Word *regs, uint32_t n_regs,
+ const int *regs_mapping, size_t
n_regs_mapping,
+ int (*callback) (Dwfl_Frame *state, void
*arg),
+ void *arg)
+ __nonnull_attribute__ (1, 5, 7, 9, 11);
+
+/* Adapts dwflst_sample_getframes to linux perf_events stack sample
+ and register file data format. Calls dwfl_attach_state on DWFL,
+ with architecture specified by ELF, ELF must remain valid during
+ Dwfl lifetime. Returns zero if all frames have been processed by
+ the callback, returns -1 on error, or the value of the callback
+ when not DWARF_CB_OK. -1 returned on error will set dwfl_errno
+ (). */
int dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf, pid_t pid, pid_t tid,
const void *stack, size_t stack_size,
const Dwarf_Word *regs, uint32_t n_regs,
diff --git a/libebl/ebl-hooks.h b/libebl/ebl-hooks.h
index 05474fbc..29ce9649 100644
--- a/libebl/ebl-hooks.h
+++ b/libebl/ebl-hooks.h
@@ -158,21 +158,32 @@ bool EBLHOOK(set_initial_registers_tid) (pid_t tid,
ebl_tid_registers_t *setfunc,
void *arg);
-/* Set process data from a perf_events sample and call SETFUNC one or more
times.
- Method should be present only when EBL_PERF_FRAME_REGS_MASK > 0, otherwise
the
- backend doesn't support unwinding from perf_events data. */
-bool EBLHOOK(set_initial_registers_sample) (const Dwarf_Word *regs, uint32_t
n_regs,
- uint64_t regs_mask, uint32_t abi,
+/* Set process data from a register sample and call SETFUNC one or more times.
+ Method should be present only when a 'default' strategy of populating an
+ array of DWARF regs and calling SETFUNC once would be inefficient, e.g.
+ on architectures with sparse/noncontiguous DWARF register files. */
+bool EBLHOOK(set_initial_registers_sample) (const Dwarf_Word *regs,
+ uint32_t n_regs,
+ const int *regs_mapping,
+ size_t n_regs_mapping,
ebl_tid_registers_t *setfunc,
void *arg);
-/* Extract the stack address from a perf_events register sample. */
-Dwarf_Word EBLHOOK(sample_base_addr) (const Dwarf_Word *regs, uint32_t n_regs,
- uint64_t regs_mask, uint32_t abi);
-
-/* Extract the instruction pointer from a perf_events register sample. */
-Dwarf_Word EBLHOOK(sample_pc) (const Dwarf_Word *regs, uint32_t n_regs,
- uint64_t regs_mask, uint32_t abi);
+/* Extract the stack address and instruction pointer from a register sample.
*/
+bool EBLHOOK(sample_sp_pc) (const Dwarf_Word *regs, uint32_t n_regs,
+ const int *regs_mapping,
+ uint32_t n_regs_mapping,
+ Dwarf_Word *sp, Dwarf_Word *pc);
+
+/* Translate from linux perf_events PERF_REGS_MASK and ABI to a generic
+ REGS_MAPPING array for use with ebl_set_initial_registers_sample().
+ Method should be present only when EBL_PERF_FRAME_REGS_MASK > 0,
+ otherwise the backend doesn't support unwinding from perf_events
+ data. */
+bool EBLHOOK(sample_perf_regs_mapping) (Ebl *ebl,
+ uint64_t perf_regs_mask, uint32_t abi,
+ const int **regs_mapping,
+ size_t *n_regs_mapping);
/* Convert *REGNO as is in DWARF to a lower range suitable for
Dwarf_Frame->REGS indexing. */
diff --git a/libebl/eblinitreg_sample.c b/libebl/eblinitreg_sample.c
index 53244d1e..d5704dfa 100644
--- a/libebl/eblinitreg_sample.c
+++ b/libebl/eblinitreg_sample.c
@@ -34,34 +34,59 @@
#include <libeblP.h>
#include <assert.h>
-Dwarf_Word
-ebl_sample_base_addr (Ebl *ebl,
- const Dwarf_Word *regs, uint32_t n_regs,
- uint64_t regs_mask, uint32_t abi)
-{
- assert (ebl->sample_base_addr != NULL);
- return ebl->sample_base_addr (regs, n_regs, regs_mask, abi);
-}
-
-Dwarf_Word
-ebl_sample_pc (Ebl *ebl,
- const Dwarf_Word *regs, uint32_t n_regs,
- uint64_t regs_mask, uint32_t abi)
+bool
+ebl_sample_sp_pc (Ebl *ebl,
+ const Dwarf_Word *regs, uint32_t n_regs,
+ const int *regs_mapping, size_t n_regs_mapping,
+ Dwarf_Word *sp, Dwarf_Word *pc)
{
- assert (ebl->sample_pc != NULL);
- return ebl->sample_pc (regs, n_regs, regs_mask, abi);
+ assert (ebl->sample_sp_pc != NULL);
+ return ebl->sample_sp_pc (regs, n_regs,
+ regs_mapping, n_regs_mapping,
+ sp, pc);
}
bool
ebl_set_initial_registers_sample (Ebl *ebl,
const Dwarf_Word *regs, uint32_t n_regs,
- uint64_t regs_mask, uint32_t abi,
+ const int *regs_mapping, size_t
n_regs_mapping,
ebl_tid_registers_t *setfunc,
void *arg)
{
- /* If set_initial_registers_sample is unsupported then PERF_FRAME_REGS_MASK
is zero. */
- assert (ebl->set_initial_registers_sample != NULL);
- return ebl->set_initial_registers_sample (regs, n_regs, regs_mask, abi,
setfunc, arg);
+ /* If set_initial_registers_sample is defined for this arch, use it. */
+ if (ebl->set_initial_registers_sample != NULL)
+ return ebl->set_initial_registers_sample (regs, n_regs,
+ regs_mapping, n_regs_mapping,
+ setfunc, arg);
+
+ /* If set_initial_registers_sample is unspecified, then it is safe
+ to use the following generic code to populate a contiguous array
+ of dwarf_regs: */
+ Dwarf_Word dwarf_regs[64];
+ assert (ebl->frame_nregs < 64);
+ size_t i;
+ for (i = 0; i < ebl->frame_nregs; i++)
+ dwarf_regs[i] = 0x0;
+ for (i = 0; i < n_regs; i++)
+ {
+ if (i > n_regs_mapping)
+ break;
+ if (regs_mapping[i] < 0 || regs_mapping[i] >= (int)ebl->frame_nregs)
+ continue;
+ dwarf_regs[regs_mapping[i]] = regs[i];
+ }
+ return setfunc (0, ebl->frame_nregs, dwarf_regs, arg);
+}
+
+bool
+ebl_sample_perf_regs_mapping (Ebl *ebl,
+ uint64_t perf_regs_mask, uint32_t abi,
+ const int **regs_mapping, size_t *n_regs_mapping)
+{
+ /* If sample_perf_regs_mapping is unsupported then PERF_FRAME_REGS_MASK is
zero. */
+ assert (ebl->sample_perf_regs_mapping != NULL);
+ return ebl->sample_perf_regs_mapping (ebl, perf_regs_mask, abi,
+ regs_mapping, n_regs_mapping);
}
uint64_t
diff --git a/libebl/libebl.h b/libebl/libebl.h
index a64d70e9..5b0e7000 100644
--- a/libebl/libebl.h
+++ b/libebl/libebl.h
@@ -340,32 +340,46 @@ extern bool ebl_set_initial_registers_tid (Ebl *ebl,
extern size_t ebl_frame_nregs (Ebl *ebl)
__nonnull_attribute__ (1);
-/* Callback to set process data from a linux perf_events sample.
- EBL architecture has to have EBL_PERF_FRAME_REGS_MASK > 0, otherwise the
- backend doesn't support unwinding from perf_events sample data. */
+/* Callback to set process data from a register sample. For each item
+ in REGS, the REGS_MAPPING array specifies its position in the full
+ register file expected by the DWARF infrastructure. */
extern bool ebl_set_initial_registers_sample (Ebl *ebl,
- const Dwarf_Word *regs, uint32_t
n_regs,
- uint64_t regs_mask, uint32_t abi,
+ const Dwarf_Word *regs,
+ uint32_t n_regs,
+ const int *regs_mapping,
+ size_t n_regs_mapping,
ebl_tid_registers_t *setfunc,
void *arg)
__nonnull_attribute__ (1, 2, 6);
-/* Extract the stack address from a perf_events register sample. */
-Dwarf_Word ebl_sample_base_addr (Ebl *ebl,
- const Dwarf_Word *regs, uint32_t n_regs,
- uint64_t regs_mask, uint32_t abi)
- __nonnull_attribute__ (1, 2);
-
-/* Extract the instruction pointer from a perf_events register sample. */
-Dwarf_Word ebl_sample_pc (Ebl *ebl,
- const Dwarf_Word *regs, uint32_t n_regs,
- uint64_t regs_mask, uint32_t abi)
- __nonnull_attribute__ (1, 2);
-
+/* Extract stack address SP and instruction pointer PC from a register
+ sample. For each item in REGS, the REGS_MAPPING array specifies
+ its position in the full register file expected by the DWARF
+ infrastructure. */
+extern bool ebl_sample_sp_pc (Ebl *ebl,
+ const Dwarf_Word *regs, uint32_t n_regs,
+ const int *regs_mapping, size_t n_regs_mapping,
+ Dwarf_Word *sp, Dwarf_Word *pc)
+ __nonnull_attribute__ (1, 2, 4);
+
+/* Translate from linux perf_events PERF_REGS_MASK and ABI to a generic
+ REGS_MAPPING array for use with ebl_set_initial_registers_sample().
+ EBL architecture has to have EBL_PERF_FRAME_REGS_MASK > 0,
+ otherwise the backend doesn't support unwinding from perf_events
+ sample data. The PERF_REGS_MASK and REGS_MAPPING are likely but
+ not guaranteed to stay constant throughout a profiling session, and
+ so the result is cached in the Ebl and only recomputed if an
+ unexpected PERF_REGS_MASK is passed to this function. */
+extern bool ebl_sample_perf_regs_mapping (Ebl *ebl,
+ uint64_t perf_regs_mask,
+ uint32_t abi,
+ const int **regs_mapping,
+ size_t *n_regs_mapping)
+ __nonnull_attribute__ (1, 4, 5);
/* Preferred sample_regs_user mask to request from linux perf_events
to allow unwinding on EBL architecture. Omitting some of these
- registers may result in failed or inaccurate unwinding. */
+ registers may result in failed or inaccurate unwinding. */
extern uint64_t ebl_perf_frame_regs_mask (Ebl *ebl)
__nonnull_attribute__ (1);
diff --git a/libebl/libeblP.h b/libebl/libeblP.h
index be14cc20..348da49e 100644
--- a/libebl/libeblP.h
+++ b/libebl/libeblP.h
@@ -65,6 +65,13 @@ struct ebl
perf_events sample data iff PERF_FRAME_REGS_MASK > 0. */
uint64_t perf_frame_regs_mask;
+ /* A cached mapping from a specified linux perf_events regs_mask to
+ the corresponding regs_mapping array, to reduce
+ ebl_sample_perf_regs_mapping() recomputations. */
+ uint64_t cached_perf_regs_mask;
+ int *cached_regs_mapping;
+ size_t cached_n_regs_mapping;
+
/* Offset to apply to the value of the return_address_register, as
fetched from a Dwarf CFI. This is used by some backends, where
the return_address_register actually contains the call
--
2.51.0