https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89252

            Bug ID: 89252
           Summary: Vector load/store aren't used to initialize large
                    memory
           Product: gcc
           Version: 9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: hjl.tools at gmail dot com
  Target Milestone: ---

[hjl@gnu-cfl-2 tmp]$ cat /tmp/x.i
struct S
{
  void *s1;
  unsigned s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14;
};

const struct S array[] = {
  { (void *) 0, 60, 640, 2112543726, 39682, 48, 16, 33, 10, 96, 2, 0, 0, 4 },
  { (void *) 0, 60, 2112543726, 192, 18251, 16, 33, 10, 96, 2, 0, 0, 4, 212 }
};

void
foo (struct S *x)
{
  x[0] = array[0];
  x[5] = array[1];
}
[hjl@gnu-cfl-2 tmp]$ gcc -S -O2 x.i
[hjl@gnu-cfl-2 tmp]$ cat x.s
        .file   "x.i"
        .text
        .p2align 4,,15
        .globl  foo
        .type   foo, @function
foo:
.LFB0:
        .cfi_startproc
        movq    $0, (%rdi)
        movl    $60, 8(%rdi)
        movl    $640, 12(%rdi)
        movl    $2112543726, 16(%rdi)
        movl    $39682, 20(%rdi)
        movl    $48, 24(%rdi)
        movl    $16, 28(%rdi)
        movl    $33, 32(%rdi)
        movl    $10, 36(%rdi)
        movl    $96, 40(%rdi)
        movl    $2, 44(%rdi)
        movl    $0, 48(%rdi)
        movl    $0, 52(%rdi)
        movl    $4, 56(%rdi)
        movq    $0, 320(%rdi)
        movl    $60, 328(%rdi)
        movl    $2112543726, 332(%rdi)
        movl    $192, 336(%rdi)
        movl    $18251, 340(%rdi)
        movl    $16, 344(%rdi)
        movl    $33, 348(%rdi)
        movl    $10, 352(%rdi)
        movl    $96, 356(%rdi)
        movl    $2, 360(%rdi)
        movl    $0, 364(%rdi)
        movl    $0, 368(%rdi)
        movl    $4, 372(%rdi)
        movl    $212, 376(%rdi)
        ret
        .cfi_endproc

We can do

foo:
.LFB0:
        .cfi_startproc
        movdqa  array(%rip), %xmm0
        movdqa  array+16(%rip), %xmm1
        movdqa  array+32(%rip), %xmm2
        movdqa  array+48(%rip), %xmm3
        movdqa  array+64(%rip), %xmm4
        movdqa  array+80(%rip), %xmm5
        movups  %xmm0, (%rdi)
        movdqa  array+96(%rip), %xmm6
        movdqa  array+112(%rip), %xmm7
        movups  %xmm1, 16(%rdi)
        movups  %xmm2, 32(%rdi)
        movups  %xmm3, 48(%rdi)
        movups  %xmm4, 320(%rdi)
        movups  %xmm5, 336(%rdi)
        movups  %xmm6, 352(%rdi)
        movups  %xmm7, 368(%rdi)
        ret
        .cfi_endproc

Reply via email to