On Thu, May 06, 2021 at 09:32:28AM +0200, Sebastien Marie wrote: > Hi, > > Anindya, did a good analysis of the problem with mpv using gpu video > output backend (it is using EGL and mesa if I correctly followed). > > > For people not reading ports@ here a resume: the destructor function > used in pthread_key_create() needs to be present in memory until > _rthread_tls_destructors() is called. > > in the case of mesa, eglInitialize() function could load, via > dlopen(), code which will use pthread_key_create() with destructor. > > once dlclose() is called, the object is unloaded from memory, but a > reference to destructor is kept, leading to segfault when > _rthread_tls_destructors() run and use the destructor (because > pointing to unloaded code). >
I was going deeper in the analysis. At first, I tought that the pthread_key_create() call was going from mesa driver (radeonsi_dri.so on my machine) as pinning the DSO in memory (using LD_PRELOAD) permitted to avoid the segfault. In fact, it isn't directly radeonsi_dri.so but another dependant library: libLLVM.so.5.0 in this case (by using LD_PRELOAD=.../libLLVM.so.5.0, the crash disapparear). Searching where is located the pthread_key_create() call, I found that it was coming from emutls implementation (which is using pthread_key_create + destructor) and which is statically linked with compiler-rt.a By instrumenting pthread_key_create, I have the following backtrace (the abort(3) is mine): (gdb) bt #0 thrkill () at /tmp/-:3 #1 0x000005188f550abe in _libc_abort () at /usr/src/lib/libc/stdlib/abort.c:51 #2 0x000005191c7e8c2b in pthread_key_create () from /home/semarie/Documents/devel/libhijacking/libthread.so #3 0x00000519399e6a87 in emutls_init () at /usr/src/gnu/lib/libcompiler_rt/../../llvm/compiler-rt/lib/builtins/emutls.c:118 #4 0x000005188f55b4f7 in pthread_once (once_control=0x51939d00b30 <emutls_init_once.once>, init_routine=0x27240efb23d627ef) at /usr/src/lib/libc/thread/rthread_once.c:26 #5 0x00000519399e68dd in emutls_init_once () at /usr/src/gnu/lib/libcompiler_rt/../../llvm/compiler-rt/lib/builtins/emutls.c:125 #6 emutls_get_index (control=0x51939cae5c8 <__emutls_v._ZL25TimeTraceProfilerInstance>) at /usr/src/gnu/lib/libcompiler_rt/../../llvm/compiler-rt/lib/builtins/emutls.c:316 #7 __emutls_get_address (control=0x51939cae5c8 <__emutls_v._ZL25TimeTraceProfilerInstance>) at /usr/src/gnu/lib/libcompiler_rt/../../llvm/compiler-rt/lib/builtins/emutls.c:379 #8 0x00000519387f296e in llvm::getTimeTraceProfilerInstance() () from /usr/lib/libLLVM.so.5.0 #9 0x0000051938ec2bf2 in llvm::legacy::PassManagerImpl::run(llvm::Module&) () from /usr/lib/libLLVM.so.5.0 #10 0x000005193974eb67 in LLVMRunPassManager () from /usr/lib/libLLVM.so.5.0 #11 0x00000518d11276d8 in ?? () from /usr/X11R6/lib/modules/dri/radeonsi_dri.so #12 0x00000518d1082761 in ?? () from /usr/X11R6/lib/modules/dri/radeonsi_dri.so #13 0x00000518d110b1ea in ?? () from /usr/X11R6/lib/modules/dri/radeonsi_dri.so #14 0x00000518d0c7939c in ?? () from /usr/X11R6/lib/modules/dri/radeonsi_dri.so #15 0x00000518d0c794ed in ?? () from /usr/X11R6/lib/modules/dri/radeonsi_dri.so #16 0x00000518d1cfbec1 in _rthread_start (v=<error reading variable: Unhandled dwarf expression opcode 0xa3>) at /usr/src/lib/librthread/rthread.c:96 #17 0x000005188f52bd2a in __tfork_thread () at /usr/src/lib/libc/arch/amd64/sys/tfork_thread.S:84 It means that emutls implementation we are using couldn't be safely used if the code is using dlopen(3). I made the following PoC using __thread : $ cat lib.c #include <stdio.h> __thread int value = 0; void fn() { printf("entering: %s\n", __func__); value = 1; printf("returning: %s\n", __func__); } $ cat main.c #include <err.h> #include <dlfcn.h> #include <stdio.h> #include <stdlib.h> #include <pthread.h> void * loadcode(void *arg) { void *lib; void (*fn)(); printf("thread: entering\n"); printf("dlopen(3)\n"); if ((lib = dlopen("./lib.so", 0)) == NULL) errx(EXIT_FAILURE, "dlopen: %s", dlerror()); if ((fn = dlsym(lib, "fn")) == NULL) errx(EXIT_FAILURE, "dlsym: %s", dlerror()); fn(); printf("dlclose(3)\n"); if (dlclose(lib) != 0) errx(EXIT_FAILURE, "dlclose: %s", dlerror()); printf("thread: returning\n"); return arg; } int main(int argc, char *argv[]) { int error; pthread_t th; if ((error = pthread_create(&th, NULL, &loadcode, NULL)) != 0) errc(error, EXIT_FAILURE, "pthread_create"); if ((error = pthread_join(th, NULL)) != 0) errc(error, EXIT_FAILURE, "pthread_join"); return EXIT_SUCCESS; } $ cc lib.c -Wall -lpthread -shared -fPIC -o lib.so $ cc main.c -Wall -lpthread $ ./a.out thread: entering dlopen(3) entering: fn returning: fn dlclose(3) thread: returning Segmentation fault (core dumped) (gdb) bt #0 0x000004eb3011aec0 in ?? () #1 0x000004eb7fc41b75 in _rthread_tls_destructors (thread=0x4eaccaa5c40) at /usr/src/lib/libc/thread/rthread_tls.c:182 #2 0x000004eb7fbd9cd3 in _libc_pthread_exit (retval=<error reading variable: Unhandled dwarf expression opcode 0xa3>) at /usr/src/lib/libc/thread/rthread.c:150 #3 0x000004eb06814ec9 in _rthread_start (v=<error reading variable: Unhandled dwarf expression opcode 0xa3>) at /usr/src/lib/librthread/rthread.c:97 #4 0x000004eb7fbedd2a in __tfork_thread () at /usr/src/lib/libc/arch/amd64/sys/tfork_thread.S:84 -- Sebastien Marie