https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120000

            Bug ID: 120000
           Summary: Unoptimal structure copy loop
           Product: gcc
           Version: 16.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: ubizjak at gmail dot com
  Target Milestone: ---

Following testcase:

--cut here--
typedef unsigned long uword __attribute__ ((mode (word)));

struct a { uword arr[30]; };

__seg_gs struct a m;

void fromgs (struct a *dst) { *dst = m; }

void togs (struct a *src) { m = *src; }

__thread struct a n;

void fromthr (struct a *dst) { *dst = n; }

void tothr (struct a *src) { n = *src; }
--cut here--

Compiles on x86_64 target to (-Os):

fromgs:
        movl    $m, %esi
        movl    $60, %ecx
        rep movsl       %gs:(%rsi), (%rdi)
        ret

togs:
        xorl    %eax, %eax
.L3:
        movl    %eax, %edx        <----- here.
        addl    $8, %eax
        movq    (%rdi,%rdx), %rcx
        movq    %rcx, %gs:m(%rdx)
        cmpl    $240, %eax
        jb      .L3
        ret

Please note unoptimal usage of registers in the above loop, resulting in one
extra move. The loop could be compiled as:

        xorl    %eax, %eax
.L3:
        movq    (%rdi,%rax), %rcx
        movq    %rcx, %gs:m(%rax)
        addl    $8, %eax
        cmpl    $240, %eax
        jb      .L3
        ret

Somehow related issue is when copying from __thread address space

fromthr:
        movq    %fs:0, %rdx
        movl    $60, %ecx
        leaq    n@tpoff(%rdx), %rsi
        rep movsl       (%rsi), (%rdi)
        ret

Please note that "rep movsl" can use segment override with its source (as is
the case with fromgs function):

        movl    n@tpoff(%rdx), %esi
        movl    $60, %ecx
        rep movsl       %fs:(%rsi), (%rdi)
        ret

resulting in much shorter asm:

   0:   8b b2 00 00 00 00       mov    0x0(%rdx),%esi
                        2: R_X86_64_TPOFF32     n
   6:   b9 3c 00 00 00          mov    $0x3c,%ecx
   b:   64 f3 a5                rep movsl %fs:(%rsi),%es:(%rdi)
   e:   c3                      ret

vs:

   0:   64 48 8b 14 25 00 00    mov    %fs:0x0,%rdx
   7:   00 00 
   9:   b9 3c 00 00 00          mov    $0x3c,%ecx
   e:   48 8d b2 00 00 00 00    lea    0x0(%rdx),%rsi
                        11: R_X86_64_TPOFF32    n
  15:   f3 a5                   rep movsl %ds:(%rsi),%es:(%rdi)
  17:   c3                      ret

tothr:
        movq    %fs:0, %rax
        movq    %rdi, %rsi
        movl    $60, %ecx
        leaq    n@tpoff(%rax), %rdi
        rep movsl       (%rsi), (%rdi)
        ret

Reply via email to