On Thu, Apr 24, 2025 at 5:52 PM Serhei Makarov <ser...@serhei.io> wrote: > > Changes for v4: > > - Separate out libdwfl_stacktrace, as requested. > > Changes for v3: > > - Fix initialization of elf in sysprof_init_dwfl > (previously triggered -Wmaybe-uninitialized). > > * * * > > Remove the code from src/stacktrace.c that is now covered by > libdwfl_stacktrace/dwflst_perf_frame.c and > dwflst_perf_sample_getframes. > > * src/stacktrace.c (show_memory_reads): Remove this verbose option as > the relevant code is inside libdwfl_stacktrace now. > (struct __sample_arg): Remove, handled by > libdwfl_stacktrace/dwflst_perf_frame.c. > (sample_next_thread): Ditto. > (sample_getthread): Ditto. > (copy_word_64): Ditto. > (copy_word_32): Ditto. > (elf_memory_read): Ditto. > (sample_memory_read): Ditto. > (sample_set_initial_registers): Ditto. > (sample_detach): Ditto. > (sample_thread_callbacks): Ditto. > (sysprof_find_dwfl): Now also return the Elf* so that it can be > passed to dwflst_perf_sample_getframes. Don't create sample_arg. Do > record sp in sui->last_sp. Don't dwfl_attach_state, > dwflst_perf_sample_getframes handles that now. > (sysprof_unwind_cb): Adapt to sysprof_find_dwfl changes, > now invoke dwflst_perf_sample_getframes instead of > dwfl_getthread_frames. > (parse_opt): Remove show_memory_reads. > (main): Remove show_memory_reads. > --- > src/stacktrace.c | 276 ++++++----------------------------------------- > 1 file changed, 30 insertions(+), 246 deletions(-) > > diff --git a/src/stacktrace.c b/src/stacktrace.c > index dd58ef3b..43b091b7 100644 > --- a/src/stacktrace.c > +++ b/src/stacktrace.c > @@ -96,8 +96,7 @@ > #include ELFUTILS_HEADER(ebl) > /* #include ELFUTILS_HEADER(dwfl) */ > #include "../libdwfl/libdwflP.h" > -/* XXX: Private header needed for find_procfile, sysprof_find_dwfl, > - sample_set_initial_registers. */ > +/* XXX: Private header needed for find_procfile. */ > #include ELFUTILS_HEADER(dwfl_stacktrace) > > /************************************* > @@ -177,11 +176,13 @@ static int processing_mode = MODE_NAIVE; > static int input_format; > static int output_format = FORMAT_SYSPROF; > > +/* XXX Used to decide regs_mask for dwflst_perf_sample_getframes. */ > +Ebl *default_ebl = NULL; > + > /* non-printable argp options. */ > #define OPT_DEBUG 0x100 > > /* Diagnostic options. */ > -static bool show_memory_reads = false; > static bool show_frames = false; > static bool show_samples = false; > static bool show_failures = false; > @@ -191,9 +192,6 @@ static bool show_summary = true; > #define ELFUTILS_STACKTRACE_VERBOSE_ENV_VAR "ELFUTILS_STACKTRACE_VERBOSE" > /* Valid values that turn on diagnostics: 'true', 'verbose', 'debug', '1', > '2'. */ > > -/* Enables detailed tracing of simulated memory reads: */ > -/* #define DEBUG_MEMORY_READ */ > - > /* Enables even more diagnostics on modules: */ > /* #define DEBUG_MODULES */ > > @@ -470,190 +468,6 @@ sysprof_reader_getframes (SysprofReader *reader, > > #endif /* HAVE_SYSPROF_HEADERS */ > > -/******************************************* > - * Memory read interface for stack samples * > - *******************************************/ > - > -/* TODO: elfutils (internal) libraries use libNN_set_errno and > DWFL_E_WHATEVER; > - this code fails silently in sample_getthread. */ > - > -struct __sample_arg > -{ > - int tid; > - Dwarf_Addr base_addr; > - uint64_t size; > - uint8_t *data; > - uint64_t n_regs; > - uint64_t abi; /* PERF_SAMPLE_REGS_ABI_{32,64} */ > - Dwarf_Addr pc; > - Dwarf_Addr sp; > - Dwarf_Addr *regs; > -}; > - > -/* The next few functions imitate the corefile interface for a single > - stack sample, with very restricted access to registers and memory. */ > - > -/* Just yield the single thread id matching the sample. */ > -static pid_t > -sample_next_thread (Dwfl *dwfl __attribute__ ((unused)), void *dwfl_arg, > - void **thread_argp) > -{ > - struct __sample_arg *sample_arg = (struct __sample_arg *)dwfl_arg; > - if (*thread_argp == NULL) > - { > - *thread_argp = (void *)0xea7b3375; > - return sample_arg->tid; > - } > - else > - return 0; > -} > - > -/* Just check that the thread id matches the sample. */ > -static bool > -sample_getthread (Dwfl *dwfl __attribute__ ((unused)), pid_t tid, > - void *dwfl_arg, void **thread_argp) > -{ > - struct __sample_arg *sample_arg = (struct __sample_arg *)dwfl_arg; > - *thread_argp = (void *)sample_arg; > - if (sample_arg->tid != tid) > - { > - return false; > - } > - return true; > -} > - > -#define copy_word_64(result, d) \ > - if ((((uintptr_t) (d)) & (sizeof (uint64_t) - 1)) == 0) \ > - *(result) = *(uint64_t *)(d); \ > - else \ > - memcpy ((result), (d), sizeof (uint64_t)); > - > -#define copy_word_32(result, d) \ > - if ((((uintptr_t) (d)) & (sizeof (uint32_t) - 1)) == 0) \ > - *(result) = *(uint32_t *)(d); \ > - else \ > - memcpy ((result), (d), sizeof (uint32_t)); > - > -#define copy_word(result, d, abi) \ > - if ((abi) == PERF_SAMPLE_REGS_ABI_64) \ > - { copy_word_64((result), (d)); } \ > - else if ((abi) == PERF_SAMPLE_REGS_ABI_32) \ > - { copy_word_32((result), (d)); } \ > - else \ > - *(result) = 0; > - > -static bool > -elf_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void *arg) > -{ > - struct __sample_arg *sample_arg = (struct __sample_arg *)arg; > - Dwfl_Module *mod = dwfl_addrmodule(dwfl, addr); > - Dwarf_Addr bias; > - Elf_Scn *section = dwfl_module_address_section(mod, &addr, &bias); > - > - if (!section) > - return false; > - > - Elf_Data *data = elf_getdata(section, NULL); > - if (data && data->d_buf && data->d_size > addr) { > - uint8_t *d = ((uint8_t *)data->d_buf) + addr; > - copy_word(result, d, sample_arg->abi); > - return true; > - } > - return false; > -} > - > -static bool > -sample_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void > *arg) > -{ > - struct __sample_arg *sample_arg = (struct __sample_arg *)arg; > - if (show_memory_reads) > - fprintf(stderr, "* memory_read addr=%lx+(%lx) size=%lx\n", > - sample_arg->base_addr, addr - sample_arg->base_addr, > sample_arg->size); > - /* Imitate read_cached_memory() with the stack sample data as the cache. */ > - if (addr < sample_arg->base_addr || addr - sample_arg->base_addr >= > sample_arg->size) > - return elf_memory_read(dwfl, addr, result, arg); > - uint8_t *d = &sample_arg->data[addr - sample_arg->base_addr]; > - copy_word(result, d, sample_arg->abi); > - return true; > -} > - > -static bool > -sample_set_initial_registers (Dwfl_Thread *thread, void *arg) > -{ > -#if 0 > - /* TODO: __libdwfl_set_initial_registers_thread not exported from libdwfl, > - after it was decided to be unsuitable for a public API. > - > - A subsequent patch in the series removes sample_set_initial_registers, > - replacing it with code in libdwfl_stacktrace/dwflst_perf_frame.c. > - Keeping this code commented-out for the record, cf how we would > - implement if the set_initial_registers utility func was public. > - > - To *actually* make this work, would need to copy the > set_initial_registers > - implementation into stacktrace.c; not worth doing since the later patch > - overrides this code. */ > - struct __sample_arg *sample_arg = (struct __sample_arg *)arg; > - dwfl_thread_state_register_pc (thread, sample_arg->pc); > - Dwfl_Process *process = thread->process; > - Ebl *ebl = process->ebl; > - /* XXX Sysprof provides exactly the required registers for unwinding: */ > - uint64_t regs_mask = ebl_perf_frame_regs_mask (ebl); > - return ebl_set_initial_registers_sample > - (ebl, sample_arg->regs, sample_arg->n_regs, regs_mask, sample_arg->abi, > - __libdwfl_set_initial_registers_thread, thread); > -#else > - /* The following facts are needed to translate x86 registers correctly: > - - perf register order seen in linux > arch/x86/include/uapi/asm/perf_regs.h > - The registers array is built in the same order as the enum! > - (See the code in tools/perf/util/intel-pt.c intel_pt_add_gp_regs().) > - - sysprof libsysprof/perf-event-stream-private.h records all registers > - except segment and flags. > - - TODO: Should include the perf regs mask in sysprof data and > - translate registers in fully-general fashion, removing this > assumption. > - - dwarf register order seen in elfutils > backends/{x86_64,i386}_initreg.c; > - and it's a fairly different register order! > - > - For comparison, you can study > codereview.qt-project.org/gitweb?p=qt-creator/perfparser.git;a=blob;f=app/perfregisterinfo.cpp;hb=HEAD > - and follow the code which uses those tables of magic numbers. > - But it's better to follow original sources of truth for this. */ > - struct __sample_arg *sample_arg = (struct __sample_arg *)arg; > - bool is_abi32 = (sample_arg->abi == PERF_SAMPLE_REGS_ABI_32); > - static const int regs_i386[] = {0, 2, 3, 1, 7/*sp*/, 6, 4, 5, 8/*ip*/}; > - static const int regs_x86_64[] = {0, 3, 2, 1, 4, 5, 6, 7/*sp*/, 9, 10, 11, > 12, 13, 14, 15, 16, 8/*ip*/}; > - const int *reg_xlat = is_abi32 ? regs_i386 : regs_x86_64; > - int n_regs = is_abi32 ? 9 : 17; > - dwfl_thread_state_register_pc (thread, sample_arg->pc); > - if (sample_arg->n_regs < (uint64_t)n_regs && show_failures) > - fprintf(stderr, N_("sample_set_initial_regs: n_regs=%ld, expected %d\n"), > - sample_arg->n_regs, n_regs); > - for (int i = 0; i < n_regs; i++) > - { > - int j = reg_xlat[i]; > - if (j < 0) continue; > - if (sample_arg->n_regs <= (uint64_t)j) continue; > - dwfl_thread_state_registers (thread, i, 1, &sample_arg->regs[j]); > - } > - return true; > -#endif > -} > - > -static void > -sample_detach (Dwfl *dwfl __attribute__ ((unused)), void *dwfl_arg) > -{ > - struct __sample_arg *sample_arg = (struct __sample_arg *)dwfl_arg; > - free (sample_arg); > -} > - > -static const Dwfl_Thread_Callbacks sample_thread_callbacks = > -{ > - sample_next_thread, > - sample_getthread, > - sample_memory_read, > - sample_set_initial_registers, > - sample_detach, > - NULL, /* sample_thread_detach */ > -}; > - > /**************************************************** > * Dwfl and statistics table for multiple processes * > ****************************************************/ > @@ -1043,33 +857,30 @@ sysprof_init_dwfl (Dwflst_Process_Tracker *cb_tracker, > Dwfl * > sysprof_find_dwfl (struct sysprof_unwind_info *sui, > SysprofCaptureStackUser *ev, > - SysprofCaptureUserRegs *regs) > + SysprofCaptureUserRegs *regs, > + Elf **out_elf) > { > pid_t pid = ev->frame.pid; > - /* TODO: Need to generalize this code beyond x86 architectures. */ > /* XXX: Note that sysprof requesting the x86_64 register file from > perf_events will result in an array of 17 regs even for 32-bit > applications. */ > -#define EXPECTED_REGS 17 > - if (regs->n_regs < EXPECTED_REGS) /* XXX expecting everything except FLAGS > */ > + if (regs->n_regs < ebl_frame_nregs(default_ebl)) /* XXX expecting > everything except FLAGS */ > { > if (show_failures) > - fprintf(stderr, N_("sysprof_find_dwfl: n_regs=%d, expected %d\n"), > - regs->n_regs, EXPECTED_REGS); > + fprintf(stderr, N_("sysprof_find_dwfl: n_regs=%d, expected %ld\n"), > + regs->n_regs, ebl_frame_nregs(default_ebl)); > return NULL; > } > > + Elf *elf = NULL; > Dwfl *dwfl = dwflst_tracker_find_pid (tracker, pid, sysprof_init_dwfl, > NULL); > - struct __sample_arg *sample_arg; > bool cached = false; > if (dwfl != NULL && dwfl->process != NULL) > { > - sample_arg = dwfl->process->callbacks_arg; /* XXX requires libdwflP.h > */ > cached = true; > goto reuse; > } > > - Elf *elf = NULL; > int elf_fd = -1; > int err = find_procfile (dwfl, &pid, &elf, &elf_fd); > if (err < 0) > @@ -1080,53 +891,22 @@ sysprof_find_dwfl (struct sysprof_unwind_info *sui, > return NULL; > } > > - sample_arg = malloc (sizeof *sample_arg); > - if (sample_arg == NULL) > - { > - if (elf != NULL) { > - elf_end (elf); > - close (elf_fd); > - } > - free (sample_arg); > - return NULL; > - } > - > reuse: > - sample_arg->tid = ev->tid; > - sample_arg->size = ev->size; > - sample_arg->data = (uint8_t *)&ev->data; > - sample_arg->regs = regs->regs; > - sample_arg->n_regs = (uint64_t)regs->n_regs; > - sample_arg->abi = (uint64_t)regs->abi; > - /* TODO: Need to generalize this code beyond x86 architectures. */ > - /* Register ordering cf. linux arch/x86/include/uapi/asm/perf_regs.h enum > perf_event_x86_regs: */ > - sample_arg->sp = regs->regs[7]; > - sample_arg->pc = regs->regs[8]; > - sample_arg->base_addr = sample_arg->sp; > - sui->last_sp = sample_arg->base_addr; > - sui->last_base = sample_arg->base_addr; > + /* TODO: Generalize to other architectures than x86_64. */ > + sui->last_sp = regs->regs[7]; > + sui->last_base = sui->last_sp; > > if (show_frames) { > - bool is_abi32 = (sample_arg->abi == PERF_SAMPLE_REGS_ABI_32); > + bool is_abi32 = (regs->abi == PERF_SAMPLE_REGS_ABI_32); > fprintf(stderr, "sysprof_find_dwfl pid %lld%s: size=%ld%s pc=%lx > sp=%lx+(%lx)\n", > (long long) pid, cached ? " (cached)" : "", > - sample_arg->size, is_abi32 ? " (32-bit)" : "", > - sample_arg->pc, sample_arg->base_addr, > - sample_arg->sp - sample_arg->base_addr); > + ev->size, is_abi32 ? " (32-bit)" : "", > + regs->regs[8], sui->last_base, (long)0); > } > > - if (!cached && ! dwfl_attach_state (dwfl, elf, pid, > &sample_thread_callbacks, sample_arg)) > - { > - if (show_failures) > - fprintf(stderr, "dwfl_attach_state pid %lld: %s\n", > - (long long)pid, dwfl_errmsg(-1)); > - elf_end (elf); > - close (elf_fd); > - free (sample_arg); > - return NULL; > - } > if (!cached) > pid_store_dwfl (pid, dwfl); > + *out_elf = elf; > return dwfl; > } > > @@ -1269,7 +1049,8 @@ sysprof_unwind_cb (SysprofCaptureFrame *frame, void > *arg) > SysprofCaptureUserRegs *regs = (SysprofCaptureUserRegs *)tail_ptr; > if (show_frames) > fprintf(stderr, "\n"); /* extra newline for padding */ > - Dwfl *dwfl = sysprof_find_dwfl (sui, ev, regs); > + Elf *elf = NULL; > + Dwfl *dwfl = sysprof_find_dwfl (sui, ev, regs, &elf); > if (dwfl == NULL) > { > if (show_summary) > @@ -1289,11 +1070,16 @@ sysprof_unwind_cb (SysprofCaptureFrame *frame, void > *arg) > sui->last_dwfl = dwfl; > #endif > sui->last_pid = frame->pid; > - int rc = dwfl_getthread_frames (dwfl, ev->tid, sysprof_unwind_frame_cb, > sui); > + uint64_t regs_mask = ebl_perf_frame_regs_mask (default_ebl); > + int rc = dwflst_perf_sample_getframes (dwfl, elf, frame->pid, ev->tid, > + (uint8_t *)&ev->data, ev->size, > + regs->regs, regs->n_regs, > + regs_mask, regs->abi, > + sysprof_unwind_frame_cb, sui); > if (rc < 0) > { > if (show_failures) > - fprintf(stderr, "dwfl_getthread_frames pid %lld: %s\n", > + fprintf(stderr, "dwflst_perf_sample_getframes pid %lld: %s\n", > (long long)frame->pid, dwfl_errmsg(-1)); > } > if (show_samples) > @@ -1409,9 +1195,6 @@ parse_opt (int key, char *arg __attribute__ ((unused)), > break; > > case OPT_DEBUG: > -#ifdef DEBUG_MEMORY_READ > - show_memory_reads = true; > -#endif > show_frames = true; > FALLTHROUGH; > case 'v': > @@ -1503,9 +1286,6 @@ > https://sourceware.org/cgit/elfutils/tree/README.eu-stacktrace?h=users/serhei/eu > else if (strcmp(env_verbose, "debug") == 0 > || strcmp(env_verbose, "2") == 0) > { > -#ifdef DEBUG_MEMORY_READ > - show_memory_reads = true; > -#endif > show_frames = true; > show_samples = true; > show_failures = true; > @@ -1571,6 +1351,10 @@ > https://sourceware.org/cgit/elfutils/tree/README.eu-stacktrace?h=users/serhei/eu > { > if (!dwfltab_init()) > error (EXIT_BAD, errno, N_("Could not initialize Dwfl table")); > + > + /* TODO: Generalize to other architectures. */ > + default_ebl = ebl_openbackend_machine(EM_X86_64); > + > struct sysprof_unwind_info sui; > sui.output_fd = output_fd; > sui.reader = reader; > -- > 2.47.0 >
LGTM. Aaron