On Tue, Aug 24, 2021 at 9:01 AM H.J. Lu via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Broadcast from integer to a pseudo vector register instead of a hard > vector register to allow LRA to remove redundant move instruction after > broadcast. > > gcc/ > > PR target/102021 > * config/i386/i386-expand.c (ix86_expand_vector_move): Broadcast > from integer to a pseudo vector register. > > gcc/testsuite/ > > PR target/102021 > * gcc.target/i386/pr100865-10b.c: Expect vzeroupper. > * gcc.target/i386/pr100865-4b.c: Likewise. > * gcc.target/i386/pr100865-6b.c: Expect vmovdqu and vzeroupper. > * gcc.target/i386/pr100865-7b.c: Likewise. > * gcc.target/i386/pr102021.c: New test. > --- > gcc/config/i386/i386-expand.c | 8 +------- > gcc/testsuite/gcc.target/i386/pr100865-10b.c | 1 - > gcc/testsuite/gcc.target/i386/pr100865-4b.c | 3 +-- > gcc/testsuite/gcc.target/i386/pr100865-6b.c | 6 ++---- > gcc/testsuite/gcc.target/i386/pr100865-7b.c | 6 ++---- > gcc/testsuite/gcc.target/i386/pr102021.c | 15 +++++++++++++++ > 6 files changed, 21 insertions(+), 18 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr102021.c > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > index 9bf13dbfa92..12db742e5f4 100644 > --- a/gcc/config/i386/i386-expand.c > +++ b/gcc/config/i386/i386-expand.c > @@ -579,13 +579,7 @@ ix86_expand_vector_move (machine_mode mode, rtx > operands[]) > { > /* Broadcast to XMM/YMM/ZMM register from an integer > constant or scalar mem. */ > - /* Hard registers are used for 2 purposes: > - 1. Prevent stack realignment when the original code > - doesn't use vector registers, which is the same for > - memcpy and memset. > - 2. Prevent combine to convert constant broadcast to > - load from constant pool. */ > - op1 = ix86_gen_scratch_sse_rtx (mode); > + op1 = gen_reg_rtx (mode); > if (FLOAT_MODE_P (mode) > || (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode)) > { if (FLOAT_MODE_P (mode) || (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode)) - { - first = force_const_mem (GET_MODE_INNER (mode), first); - op1 = gen_reg_rtx (mode); - } + first = force_const_mem (GET_MODE_INNER (mode), first); Could you also apply the upper, others LGTM. > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-10b.c > b/gcc/testsuite/gcc.target/i386/pr100865-10b.c > index 77ace86ffe8..e5616d8d258 100644 > --- a/gcc/testsuite/gcc.target/i386/pr100865-10b.c > +++ b/gcc/testsuite/gcc.target/i386/pr100865-10b.c > @@ -5,4 +5,3 @@ > > /* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, > %ymm\[0-9\]+" 1 } } */ > /* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 8 } } > */ > -/* { dg-final { scan-assembler-not "vzeroupper" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-4b.c > b/gcc/testsuite/gcc.target/i386/pr100865-4b.c > index 80e9fdb12ea..6d9cb91b8e9 100644 > --- a/gcc/testsuite/gcc.target/i386/pr100865-4b.c > +++ b/gcc/testsuite/gcc.target/i386/pr100865-4b.c > @@ -5,7 +5,6 @@ > > /* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, > %ymm\[0-9\]+" 1 } } */ > /* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 2 } } > */ > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */ > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */ > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ > /* { dg-final { scan-assembler-not "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, > %ymm\[0-9\]+" } } */ > /* { dg-final { scan-assembler-not "vmovdqa" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-6b.c > b/gcc/testsuite/gcc.target/i386/pr100865-6b.c > index 35f2e961d25..9588249cb02 100644 > --- a/gcc/testsuite/gcc.target/i386/pr100865-6b.c > +++ b/gcc/testsuite/gcc.target/i386/pr100865-6b.c > @@ -4,9 +4,7 @@ > #include "pr100865-6a.c" > > /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, > %ymm\[0-9\]+" 1 } } */ > -/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 { > target ia32 } } } */ > -/* { dg-final { scan-assembler-times "vmovdqu32\[\\t \]%ymm\[0-9\]+, " 8 { > target { ! ia32 } } } } */ > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */ > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */ > +/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 } } */ > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ > /* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, > %ymm\[0-9\]+" } } */ > /* { dg-final { scan-assembler-not "vmovdqa" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7b.c > b/gcc/testsuite/gcc.target/i386/pr100865-7b.c > index ad267c43891..3b20c680521 100644 > --- a/gcc/testsuite/gcc.target/i386/pr100865-7b.c > +++ b/gcc/testsuite/gcc.target/i386/pr100865-7b.c > @@ -5,8 +5,6 @@ > > /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%r\[^\n\]*, > %ymm\[0-9\]+" 1 { target { ! ia32 } } } } */ > /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, > %ymm\[0-9\]+" 1 { target ia32 } } } */ > -/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 { > target ia32 } } } */ > -/* { dg-final { scan-assembler-times "vmovdqu64\[\\t \]%ymm\[0-9\]+, " 16 { > target { ! ia32 } } } } */ > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */ > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */ > +/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 } } > */ > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ > /* { dg-final { scan-assembler-not "vmovdqa" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr102021.c > b/gcc/testsuite/gcc.target/i386/pr102021.c > new file mode 100644 > index 00000000000..6db3f57dc76 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr102021.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=skylake-avx512" } */ > + > +#include<immintrin.h> > + > +__m256i > +foo () > +{ > + return _mm256_set1_epi16 (12); > +} > + > +/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%r\[^\n\]*, > %ymm\[0-9\]+" 1 { target { ! ia32 } } } } */ > +/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, > %ymm\[0-9\]+" 1 { target ia32 } } } */ > +/* { dg-final { scan-assembler-not "vmovdqa" } } */ > +/* { dg-final { scan-assembler-not "vzeroupper" } } */ > -- > 2.31.1 >
-- BR, Hongtao