https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120426
Bug ID: 120426 Summary: XMM store isn't used Product: gcc Version: 16.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: hjl.tools at gmail dot com CC: liuhongt at gcc dot gnu.org Target Milestone: --- Target: x86-64 [hjl@gnu-zen4-1 pr117839]$ cat dl-1.c struct __pthread_mutex_s { int __lock; unsigned int __count; int __owner; unsigned int __nusers; int __kind; short __spins; short __elision; void *p[2]; }; typedef union { struct __pthread_mutex_s __data; char __size[40]; long int __align; } pthread_mutex_t; typedef struct { pthread_mutex_t mutex; } __rtld_lock_recursive_t; void foo (__rtld_lock_recursive_t *lock) { *lock = (__rtld_lock_recursive_t) {{ { 0, 0, 0, 0, 1, 0, 0, { ((void *)0) , ((void *)0) } } }}; } [hjl@gnu-zen4-1 pr117839]$ make dl-1.s /export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/xgcc -B/export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/ -O2 -fPIC -S dl-1.c [hjl@gnu-zen4-1 pr117839]$ make dl-1-spr.s /export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/xgcc -B/export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/ -O2 -fPIC -mtune=sapphirerapids -S -o dl-1-spr.s dl-1.c [hjl@gnu-zen4-1 pr117839]$ cat dl-1.s .file "dl-1.c" .text .p2align 4 .globl foo .type foo, @function foo: .LFB0: .cfi_startproc pxor %xmm0, %xmm0 movq $0, 32(%rdi) movups %xmm0, 16(%rdi) movups %xmm0, (%rdi) movl $1, 16(%rdi) ret .cfi_endproc .LFE0: .size foo, .-foo .ident "GCC: (GNU) 16.0.0 20250524 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-zen4-1 pr117839]$ cat dl-1-spr.s .file "dl-1.c" .text .p2align 4 .globl foo .type foo, @function foo: .LFB0: .cfi_startproc pxor %xmm0, %xmm0 movq $0, (%rdi) movq $0, 8(%rdi) movq $1, 16(%rdi) movups %xmm0, 24(%rdi) ret .cfi_endproc .LFE0: .size foo, .-foo .ident "GCC: (GNU) 16.0.0 20250524 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-zen4-1 pr117839]$ The optimized code should be pxor %xmm0, %xmm0 movups %xmm0, (%rdi) movq $1, 16(%rdi) movups %xmm0, 24(%rdi)