On Fri, Apr 3, 2020 at 6:51 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > commit 16ed2601ad0a4aa82f11e9df86ea92183f94f979 > Author: H.J. Lu <hongjiu...@intel.com> > Date: Wed May 15 15:26:19 2019 +0000 > > i386: Emulate MMX pshufb with SSE version > > has > > +(define_insn_and_split "ssse3_pshufbv8qi3" > + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") > + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv") > + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")] > + UNSPEC_PSHUFB)) > + (clobber (match_scratch:V4SI 3 "=X,x,Yv"))] > ^^^ There are earlyclobber. > + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" > + "@ > + pshufb\t{%2, %0|%0, %2} > + # > + #" > + "TARGET_MMX_WITH_SSE && reload_completed" > + [(set (match_dup 3) (match_dup 5)) > + (set (match_dup 3) > + (and:V4SI (match_dup 3) (match_dup 2))) > + (set (match_dup 0) > + (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))] > > If input register operand 2 is dead after this insn, RA may choose it > as scratch operand. Since it isn't marked as earlyclobber, operand 2 > becomes unused after split and then it gets optimized out. Mark scratch > operand as earlyclobber fixes the issue. > > OK for master if there are no regressions? > > H.J. > -- > gcc/ > > PR target/94467 > * config/i386/sse.md (ssse3_pshufbv8qi3): Mark scratch operand > as earlyclobber. > > gcc/ > > PR target/94467 > * testsuite/gcc.target/i386/pr94467-1.c: New test. > * testsuite/gcc.target/i386/pr94467-2.c: Likewise.
OK. Thanks, Uros. > --- > gcc/config/i386/sse.md | 2 +- > gcc/testsuite/gcc.target/i386/pr94467-1.c | 40 +++++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr94467-2.c | 48 +++++++++++++++++++++++ > 3 files changed, 89 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr94467-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr94467-2.c > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index fba91b7369a..1de03a515d9 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -16695,7 +16695,7 @@ (define_insn_and_split "ssse3_pshufbv8qi3" > (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv") > (match_operand:V8QI 2 "register_mmxmem_operand" > "ym,x,Yv")] > UNSPEC_PSHUFB)) > - (clobber (match_scratch:V4SI 3 "=X,x,Yv"))] > + (clobber (match_scratch:V4SI 3 "=X,&x,&Yv"))] > "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" > "@ > pshufb\t{%2, %0|%0, %2} > diff --git a/gcc/testsuite/gcc.target/i386/pr94467-1.c > b/gcc/testsuite/gcc.target/i386/pr94467-1.c > new file mode 100644 > index 00000000000..a51c3a8f5fe > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr94467-1.c > @@ -0,0 +1,40 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target avx } */ > +/* { dg-options "-O -mavx" } */ > + > +#include "avx-check.h" > + > +typedef char __attribute__ ((__vector_size__ (8))) v8qi; > +typedef short __attribute__ ((__vector_size__ (8))) v4hi; > +typedef int __attribute__ ((__vector_size__ (8))) v2si; > +typedef long long __attribute__ ((__vector_size__ (8))) v1di; > +typedef unsigned long long u64; > +u64 k, c; > + > +v8qi g, h, p, q; > +v4hi d, e, f, l, n, o; > +v2si j; > + > +u64 > +foo (v4hi r) > +{ > + v8qi s; > + f = (v4hi) j; > + e = __builtin_ia32_psrlwi ((v4hi) k, c); > + s = __builtin_ia32_pavgb (h, h); > + n = __builtin_ia32_pabsw (f); > + o = __builtin_ia32_psubusw (n, l); > + p = __builtin_ia32_packsswb (r, o); > + q = __builtin_ia32_pshufb (p, s); > + g = __builtin_ia32_punpcklbw (q, (v8qi) r); > + d = r; > + return (u64) g + (u64) h + (u64) j; > +} > + > +static void > +avx_test (void) > +{ > + u64 x = foo ((v4hi) { 5 }); > + if (x != 0x0005000500050505) > + __builtin_abort (); > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr94467-2.c > b/gcc/testsuite/gcc.target/i386/pr94467-2.c > new file mode 100644 > index 00000000000..8128be325e4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr94467-2.c > @@ -0,0 +1,48 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target ssse3 } */ > +/* { dg-options "-O -mssse3" } */ > + > +#ifndef CHECK_H > +#define CHECK_H "ssse3-check.h" > +#endif > + > +#ifndef TEST > +#define TEST ssse3_test > +#endif > + > +#include CHECK_H > + > +typedef char __attribute__ ((__vector_size__ (8))) v8qi; > +typedef short __attribute__ ((__vector_size__ (8))) v4hi; > +typedef int __attribute__ ((__vector_size__ (8))) v2si; > +typedef long long __attribute__ ((__vector_size__ (8))) v1di; > +typedef unsigned long long u64; > +u64 k, c; > + > +v8qi g, h, p, q; > +v4hi d, e, f, l, n, o; > +v2si j; > + > +u64 > +foo (v4hi r) > +{ > + v8qi s; > + f = (v4hi) j; > + e = __builtin_ia32_psrlwi ((v4hi) k, c); > + s = __builtin_ia32_pavgb (h, h); > + n = __builtin_ia32_pabsw (f); > + o = __builtin_ia32_psubusw (n, l); > + p = __builtin_ia32_packsswb (r, o); > + q = __builtin_ia32_pshufb (p, s); > + g = __builtin_ia32_punpcklbw (q, (v8qi) r); > + d = r; > + return (u64) g + (u64) h + (u64) j; > +} > + > +static void > +ssse3_test (void) > +{ > + u64 x = foo ((v4hi) { 5 }); > + if (x != 0x0005000500050505) > + __builtin_abort (); > +} > -- > 2.25.1 >