https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92803

--- Comment #10 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
Another testcase to consider (-O2 -mavx2):
typedef double v4df __attribute__((vector_size (32)));
typedef double v2df __attribute__((vector_size (16)));
typedef short v16hi __attribute__((vector_size (32)));
typedef short v8hi __attribute__((vector_size (16)));

v2df
foo (v4df x, double *p)
{
  return (v2df) { x[1], *p };
}

v2df
bar (v4df x, double *p)
{
  return (v2df) { x[0], *p };
}

v2df
baz (v2df x, double *p)
{
  return (v2df) { x[1], *p };
}

v2df
qux (v2df x, double *p)
{
  return (v2df) { x[0], *p };
}

Comparing gcc-9 with trunk + my patch, the differences are:
foo:
-       vunpckhpd       %xmm0, %xmm0, %xmm0
-       vmovhpd (%rdi), %xmm0, %xmm0
+       vbroadcastsd    (%rdi), %ymm1
+       vinsertf128     $1, %xmm1, %ymm0, %ymm0
+       vpermpd $77, %ymm0, %ymm0
bar:
-       vmovhpd (%rdi), %xmm0, %xmm0
+       vbroadcastsd    (%rdi), %ymm1
+       vinsertf128     $1, %xmm1, %ymm0, %ymm0
+       vpermpd $76, %ymm0, %ymm0
baz:
-       vunpckhpd       %xmm0, %xmm0, %xmm0
-       vmovhpd (%rdi), %xmm0, %xmm0
+       vmovddup        (%rdi), %xmm1
+       vunpckhpd       %xmm1, %xmm0, %xmm0
qux:
-       vmovhpd (%rdi), %xmm0, %xmm0
+       vmovapd %xmm0, %xmm1
+       vmovddup        (%rdi), %xmm0
+       vmovsd  %xmm1, %xmm0, %xmm0

Reply via email to