Hi Serhei,

On Tue, Mar 24, 2026 at 6:37 PM Serhei Makarov <[email protected]> wrote:
>
> After developing the common pieces in prior patches, adding register
> sample support for another architecture is fairly predictable. Here we
> add support for 32-bit ARM.
>
> * backends/Makefile.am (arm_SRCS): Add arm_initreg_sample.c.
> * backends/arm_init.c (arm_init): Add hooks for
>   set_initial_registers_sample, sample_sp_pc, perf_frame_regs_mask.
> * backends/arm_initreg_sample.c: New file. Implement
>   arm_set_initial_registers_sample mirroring the ptrace->dwarf_regs
>   logic in arm_initreg.c.
> * backends/libebl_PERF_FLAGS.h (PERF_FRAME_REGISTERS_ARM): New
>   constant describing registers needed for 32-bit arm unwinding.  Also
>   define it on 64-bit arm to allow profiling programs running in 32-bit
>   compatibility mode.
> ---
>  backends/Makefile.am          |  2 +-
>  backends/arm_init.c           |  8 ++-
>  backends/arm_initreg_sample.c | 91 +++++++++++++++++++++++++++++++++++
>  backends/libebl_PERF_FLAGS.h  | 28 ++++++++++-
>  4 files changed, 126 insertions(+), 3 deletions(-)
>  create mode 100644 backends/arm_initreg_sample.c
>
> diff --git a/backends/Makefile.am b/backends/Makefile.am
> index bebd990e..318acef8 100644
> --- a/backends/Makefile.am
> +++ b/backends/Makefile.am
> @@ -57,7 +57,7 @@ alpha_SRCS = alpha_init.c alpha_symbol.c alpha_retval.c 
> alpha_regs.c \
>
>  arm_SRCS = arm_init.c arm_symbol.c arm_regs.c arm_corenote.c \
>            arm_auxv.c arm_attrs.c arm_retval.c arm_cfi.c arm_initreg.c \
> -          arm_machineflagname.c
> +          arm_initreg_sample.c arm_machineflagname.c
>
>  aarch64_SRCS = aarch64_init.c aarch64_regs.c aarch64_symbol.c  \
>                aarch64_corenote.c aarch64_retval.c aarch64_cfi.c \
> diff --git a/backends/arm_init.c b/backends/arm_init.c
> index 70b75942..8b5568f9 100644
> --- a/backends/arm_init.c
> +++ b/backends/arm_init.c
> @@ -1,5 +1,5 @@
>  /* Initialization of Arm specific backend library.
> -   Copyright (C) 2002, 2005, 2009, 2013, 2014, 2015, 2017 Red Hat, Inc.
> +   Copyright (C) 2002, 2005, 2009, 2013, 2014, 2015, 2017, 2026 Red Hat, Inc.
>     This file is part of elfutils.
>     Written by Ulrich Drepper <[email protected]>, 2002.
>
> @@ -34,6 +34,7 @@
>  #define BACKEND                arm_
>  #define RELOC_PREFIX   R_ARM_
>  #include "libebl_CPU.h"
> +#include "libebl_PERF_FLAGS.h"
>
>  /* This defines the common reloc hooks based on arm_reloc.def.  */
>  #include "common-reloc.c"
> @@ -64,6 +65,11 @@ arm_init (Elf *elf __attribute__ ((unused)),
>    /* We only unwind the core integer registers.  */
>    eh->frame_nregs = 16;
>    HOOK (eh, set_initial_registers_tid);
> +  HOOK (eh, set_initial_registers_sample);
> +  HOOK (eh, sample_sp_pc);
> +  /* sample_perf_regs_mapping is default ver  */
> +  eh->perf_frame_regs_mask = PERF_FRAME_REGISTERS_ARM;
> +  __libebl_init_cached_regs_mapping (eh);
>
>    /* Bit zero encodes whether an function address is THUMB or ARM. */
>    eh->func_addr_mask = ~(GElf_Addr)1;
> diff --git a/backends/arm_initreg_sample.c b/backends/arm_initreg_sample.c
> new file mode 100644
> index 00000000..25e7652a
> --- /dev/null
> +++ b/backends/arm_initreg_sample.c
> @@ -0,0 +1,91 @@
> +/* Populate process registers from a register sample.
> +   Copyright (C) 2026 Red Hat Inc.
> +   This file is part of elfutils.
> +
> +   This file is free software; you can redistribute it and/or modify
> +   it under the terms of either
> +
> +     * the GNU Lesser General Public License as published by the Free
> +       Software Foundation; either version 3 of the License, or (at
> +       your option) any later version
> +
> +   or
> +
> +     * the GNU General Public License as published by the Free
> +       Software Foundation; either version 2 of the License, or (at
> +       your option) any later version
> +
> +   or both in parallel, as here.
> +
> +   elfutils is distributed in the hope that it will be useful, but
> +   WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   General Public License for more details.
> +
> +   You should have received copies of the GNU General Public License and
> +   the GNU Lesser General Public License along with this program.  If
> +   not, see <http://www.gnu.org/licenses/>.  */
> +
> +#ifdef HAVE_CONFIG_H
> +# include <config.h>
> +#endif
> +
> +#include <stdlib.h>
> +#include <assert.h>
> +
> +#define BACKEND arm_
> +#include "libebl_CPU.h"
> +#include "libebl_PERF_FLAGS.h"
> +
> +bool
> +arm_sample_sp_pc (const Dwarf_Word *regs, uint32_t n_regs,
> +                 const int *regs_mapping, size_t n_regs_mapping,
> +                 Dwarf_Word *sp, Dwarf_Word *pc)
> +{
> +  return generic_sample_sp_pc (regs, n_regs, regs_mapping, n_regs_mapping,
> +                              sp, 13 /* index of sp in dwarf_regs */,
> +                              pc, 15 /* index of pc in dwarf_regs */);
> +}
> +
> +bool
> +arm_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs,
> +                                 const int *regs_mapping, size_t 
> n_regs_mapping,
> +                                 ebl_tid_registers_t *setfunc,
> +                                 void *arg)
> +{
> +#if !defined(__linux__) || (!defined __arm__ && !defined __aarch64__)
> +  (void)regs; (void)n_regs;
> +  (void)regs_mapping; (void)n_regs_mapping;
> +  (void)setfunc; (void)arg;
> +  return false;
> +#else  /* __arm__ || __aarch64__ */
> +  /* __arm__: Regular 32-bit arm
> +     __aarch64__: Compat mode, arm compatible code running on aarch64  */
> +
> +  /* TODO(REVIEW) The existing code in arm_initreg.c has to unpack a
> +     register file of 32-bit words into Dwarf_Word array, but here
> +     this should have happened already, probably in whatever code
> +     unpacks the PERF_RECORD_SAMPLE; or perf_events already provides
> +     an array of 64-bit regs?  */

kernel/events/core.c:perf_output_sample_regs() looks like it
exclusively outputs u64 values so I don't think you need to deal with
unpacking 32-bit values here.

> +
> +  /* TODO(REVIEW): It seems like the default
> +     ebl_set_initial_registers_sample implementation can be used
> +     here.  */

Yes it looks identical, let's use the default then.

> +
> +#define N_GREGS 16
> +  Dwarf_Word dwarf_regs[N_GREGS];
> +  size_t i;
> +  for (i = 0; i < N_GREGS; i++)
> +    dwarf_regs[i] = 0x0;
> +  for (i = 0; i < (size_t)n_regs; i++)
> +    {
> +      if (i >= n_regs_mapping)
> +       break;
> +      if (regs_mapping[i] < 0 || regs_mapping[i] >= N_GREGS)
> +       continue;
> +      dwarf_regs[regs_mapping[i]] = regs[i];
> +    }
> +
> +  return setfunc (0, 16, dwarf_regs, arg);
> +#endif
> +}
> diff --git a/backends/libebl_PERF_FLAGS.h b/backends/libebl_PERF_FLAGS.h
> index 986e3f16..42fca465 100644
> --- a/backends/libebl_PERF_FLAGS.h
> +++ b/backends/libebl_PERF_FLAGS.h
> @@ -33,7 +33,7 @@
>
>  #if defined(__linux__)
>  /* XXX Need to exclude __linux__ arches without perf_regs.h. */
> -#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__)
> +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || 
> defined(__arm__)
>  /* || defined(other_architecture)... */
>  # include <asm/perf_regs.h>
>  #endif
> @@ -59,6 +59,20 @@
>  #define PERF_FRAME_REGISTERS_X86_64 0
>  #endif /* _ASM_X86_PERF_REGS_H */
>
> +#if defined(_ASM_ARM_PERF_REGS_H)
> +#define REG(R) (1ULL << PERF_REG_ARM_ ## R)
> +/* Proper unwind set: callee-saved R4..R10, then R11 for FP, and SP,
> +   LR, PC. Collecting all 16 regs would also be feasible.  */
> +#define PERF_FRAME_REGISTERS_ARM (REG(R0) | REG(R1) | REG(R2) | REG(R3)  \
> +  | REG(R4) | REG(R5) | REG(R6) | REG(R7) | REG(R8) | REG(R9) | REG(R10) \
> +  | REG(FP) | REG(IP) | REG(SP) | REG(LR) | REG(PC))
> +/* Register ordering defined in linux arch/arm/include/uapi/asm/perf_regs.h. 
>  */
> +#elif !defined(_ASM_ARM64_PERF_REGS_H)
> +/* Since asm/perf_regs.h is absent, or gives the register layout for a
> +   different arch, we can't unwind 32-bit ARM perf sample frames.  */
> +#define PERF_FRAME_REGISTERS_ARM 0
> +#endif /* _ASM_ARM_PERF_REGS_H */
> +
>  #if defined(_ASM_ARM64_PERF_REGS_H)
>  #define REG(R) (1ULL << PERF_REG_ARM64_ ## R)
>  /* Proper unwind set: callee-saved X19..X28, then X29 for FP,
> @@ -67,6 +81,18 @@
>    | REG(X22) | REG(X23) | REG(X24) | REG(X25) | REG(X26) | REG(X27)  \
>    | REG(X28) | REG(X29) /*FP*/ | REG(LR) | REG(SP) | REG(PC))
>  /* Register ordering defined in linux 
> arch/arm64/include/uapi/asm/perf_regs.h.  */
> +
> +/* Likewise, for 32bit-on-64bit compat mode:  */
> +#define PERF_FRAME_REGISTERS_ARM (REG(X0) | REG(X1) | REG(X2) | REG(X3)   \
> +  | REG(X4) | REG(X5) | REG(X6) | REG(X7) | REG(X8) | REG(X9) | REG(X10)  \
> +  | REG(X11) /* FP */ | REG(X12) /* IP */ /* | skip X13..X29 */ | REG(LR) \
> +  | REG(SP) | REG(PC))
> +/* TODO(REVIEW): Then the profiler likely needs to be instructed to
> +   request the intersection of these register sets rather than just
> +   PERF_FRAME_REGISTERS_AARCH64? i.e. in aarch64_init.c:
> +
> +   eh->perf_frame_regs_mask = PERF_FRAME_REGISTERS_AARCH64 | 
> PERF_FRAME_REGISTERS_ARM;
> +*/

Seems plausible that for compat processes, perf maps arm regs onto
aarch64 regs but I wasn't able to verify this.

Aaron

>  #else
>  /* Since asm/perf_regs.h is absent, or gives the register layout for a
>     different arch, we can't unwind aarch64 perf sample frames.  */
> --
> 2.53.0
>

Reply via email to