https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103554

            Bug ID: 103554
           Summary: -mavx generates worse code on scalar code
           Product: gcc
           Version: 11.2.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: avi at scylladb dot com
  Target Milestone: ---

Test case:

struct s1 {
    long a, b, c, d, e, f, g, h;
};

s1 move(s1 in) {
    s1 ret;

    ret.a = in.d;
    ret.b = in.e;
    ret.c = in.a;
    ret.d = in.b;
    return ret;
}


-O3 generates:

move(s1):
  movq 8(%rsp), %xmm0
  movq 32(%rsp), %xmm1
  movq %rdi, %rax
  movhps 16(%rsp), %xmm0
  movhps 40(%rsp), %xmm1
  movups %xmm1, (%rdi)
  movups %xmm0, 16(%rdi)
  ret


-O3 -mavx generates:

move(s1):
        pushq   %rbp
        movq    %rdi, %rax
        movq    %rsp, %rbp
        vmovq   16(%rbp), %xmm2
        vmovq   40(%rbp), %xmm3
        vpinsrq $1, 24(%rbp), %xmm2, %xmm1
        vpinsrq $1, 48(%rbp), %xmm3, %xmm0
        vinsertf128     $0x1, %xmm1, %ymm0, %ymm0
        vmovdqu %ymm0, (%rdi)
        vzeroupper
        popq    %rbp
        ret

Clang -O3 generates this simple code, with or without -mavx (-mavx does use VEX
instructions):

move(s1): # @move(s1)
  movq %rdi, %rax
  movups 32(%rsp), %xmm0
  movups %xmm0, (%rdi)
  movaps 8(%rsp), %xmm0
  movups %xmm0, 16(%rdi)
  retq

Reply via email to