https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120000
Bug ID: 120000
Summary: Unoptimal structure copy loop
Product: gcc
Version: 16.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: ubizjak at gmail dot com
Target Milestone: ---
Following testcase:
--cut here--
typedef unsigned long uword __attribute__ ((mode (word)));
struct a { uword arr[30]; };
__seg_gs struct a m;
void fromgs (struct a *dst) { *dst = m; }
void togs (struct a *src) { m = *src; }
__thread struct a n;
void fromthr (struct a *dst) { *dst = n; }
void tothr (struct a *src) { n = *src; }
--cut here--
Compiles on x86_64 target to (-Os):
fromgs:
movl $m, %esi
movl $60, %ecx
rep movsl %gs:(%rsi), (%rdi)
ret
togs:
xorl %eax, %eax
.L3:
movl %eax, %edx <----- here.
addl $8, %eax
movq (%rdi,%rdx), %rcx
movq %rcx, %gs:m(%rdx)
cmpl $240, %eax
jb .L3
ret
Please note unoptimal usage of registers in the above loop, resulting in one
extra move. The loop could be compiled as:
xorl %eax, %eax
.L3:
movq (%rdi,%rax), %rcx
movq %rcx, %gs:m(%rax)
addl $8, %eax
cmpl $240, %eax
jb .L3
ret
Somehow related issue is when copying from __thread address space
fromthr:
movq %fs:0, %rdx
movl $60, %ecx
leaq n@tpoff(%rdx), %rsi
rep movsl (%rsi), (%rdi)
ret
Please note that "rep movsl" can use segment override with its source (as is
the case with fromgs function):
movl n@tpoff(%rdx), %esi
movl $60, %ecx
rep movsl %fs:(%rsi), (%rdi)
ret
resulting in much shorter asm:
0: 8b b2 00 00 00 00 mov 0x0(%rdx),%esi
2: R_X86_64_TPOFF32 n
6: b9 3c 00 00 00 mov $0x3c,%ecx
b: 64 f3 a5 rep movsl %fs:(%rsi),%es:(%rdi)
e: c3 ret
vs:
0: 64 48 8b 14 25 00 00 mov %fs:0x0,%rdx
7: 00 00
9: b9 3c 00 00 00 mov $0x3c,%ecx
e: 48 8d b2 00 00 00 00 lea 0x0(%rdx),%rsi
11: R_X86_64_TPOFF32 n
15: f3 a5 rep movsl %ds:(%rsi),%es:(%rdi)
17: c3 ret
tothr:
movq %fs:0, %rax
movq %rdi, %rsi
movl $60, %ecx
leaq n@tpoff(%rax), %rdi
rep movsl (%rsi), (%rdi)
ret