https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111373

            Bug ID: 111373
           Summary: Register moves right before stores and return
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: tkoenig at gcc dot gnu.org
  Target Milestone: ---

The code

#define SWAP(i,j) do { \
  if (v[i] > v[j]) { \
    tmp_v = v[i]; v[i] = v[j]; v[j] = tmp_v;    \
    tmp_p = a[i]; a[i] = a[j]; a[j] = tmp_p;    \
    }                                           \
  } while(0)

void s3 (long int *p[3])
{
  long int v[3];
  long int *a[3];
  long int tmp_v;
  long int *tmp_p;
  a[0] = p[0];
  v[0] = *p[0];
  a[1] = p[1];
  v[1] = *p[1];
  a[2] = p[2];
  v[2] = *p[2];
  SWAP (0,1);
  SWAP (0,2);
  SWAP (1,2);
  p[0] = a[0];
  p[1] = a[1];
  p[2] = a[2];
}

yields, with reasonably recent trunk with -O3, code where there are
register moves right before the results are stored, for example on x86_64:

s3:
.LFB0:
        .cfi_startproc
        movq    (%rdi), %rax
        movq    8(%rdi), %rcx
        movq    16(%rdi), %rdx
        movq    (%rax), %r8
        movq    (%rcx), %rsi
        movq    (%rdx), %r9
        cmpq    %rsi, %r8
        jg      .L2
        cmpq    %r9, %r8
        jle     .L3
        movq    %rax, %r9
        movq    %rdx, %rax
        movq    %r9, %rdx
        movq    %r8, %r9
.L3:
        cmpq    %rsi, %r9
        jl      .L10
.L4:
        movq    %rax, (%rdi)
        movq    %rcx, 8(%rdi)
        movq    %rdx, 16(%rdi)
        ret
        .p2align 4,,10
        .p2align 3
.L2:
        cmpq    %r9, %rsi
        jle     .L11
        movq    %rdx, %rsi
        movq    %rax, %rdx
        movq    %rcx, 8(%rdi)
        movq    %rsi, %rax
        movq    %rdx, 16(%rdi)
        movq    %rax, (%rdi)
        ret
        .p2align 4,,10
        .p2align 3
.L11:
        movq    %r8, %rsi
        movq    %rax, %r8
        movq    %rcx, %rax
        movq    %r8, %rcx
        cmpq    %rsi, %r9
        jge     .L4
.L10:
        movq    %rcx, %rsi
        movq    %rdx, %rcx
        movq    %rax, (%rdi)
        movq    %rsi, %rdx
        movq    %rcx, 8(%rdi)
        movq    %rdx, 16(%rdi)
        ret

This seems to be a general phenomenon, see https://godbolt.org/z/xW9x75qbf for
RISC-V (POWER is similar).

Reply via email to