On Sun, Dec 1, 2024 at 8:01 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > On Sat, Nov 30, 2024 at 11:00 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > Add pcmpeq splitters to split > > > > (insn 5 3 7 2 (set (reg:V4SI 100) > > (eq:V4SI (reg:V4SI 98) > > (reg:V4SI 98))) 7910 {*sse2_eqv4si3} > > (expr_list:REG_DEAD (reg:V4SI 98) > > (expr_list:REG_EQUAL (eq:V4SI (const_vector:V4SI [ > > (const_int -1 [0xffffffffffffffff]) repeated x4 > > ]) > > (const_vector:V4SI [ > > (const_int -1 [0xffffffffffffffff]) repeated x4 > > ])) > > (nil)))) > > > > to > > > > (insn 8 3 7 2 (set (reg:V4SI 100) > > (const_vector:V4SI [ > > (const_int -1 [0xffffffffffffffff]) repeated x4 > > ])) -1 > > (nil)) > > IMO, middle-end should handle these cases, and I'm surprised that it > doesn't. These RTXes are not unspecs. > > OTOH, splitters should handle only nonmemory operands. Memory operands > can be volatile and we shouldn't remove these at will.
Fixed in the v2 patch by using register_operand to keep the memory operand. Thanks. > Uros. > > > gcc/ > > > > PR target/117863 > > * config/i386/sse.md: Add pcmpeq splitters. > > > > gcc/testsuite/ > > > > PR target/117863 > > * gcc.dg/rtl/i386/vector_eq-2.c: New test. > > > > Signed-off-by: H.J. Lu <hjl.to...@gmail.com> > > --- > > gcc/config/i386/sse.md | 33 ++++++++++ > > gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c | 71 +++++++++++++++++++++ > > 2 files changed, 104 insertions(+) > > create mode 100644 gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c > > > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > > index 498a42d6e1e..e2ce0781cb4 100644 > > --- a/gcc/config/i386/sse.md > > +++ b/gcc/config/i386/sse.md > > @@ -17943,6 +17943,17 @@ (define_insn "*avx2_eq<mode>3" > > (set_attr "prefix" "vex") > > (set_attr "mode" "OI")]) > > > > +(define_split > > + [(set (match_operand:VI_256 0 "register_operand") > > + (eq:VI_256 > > + (match_operand:VI_256 1 "nonimmediate_operand") > > + (match_operand:VI_256 2 "nonimmediate_operand")))] > > + "TARGET_AVX2 && rtx_equal_p (operands[1], operands[2])" > > + [(set (match_dup 0) (match_dup 1))] > > +{ > > + operands[1] = CONSTM1_RTX (<MODE>mode); > > +}) > > + > > (define_insn_and_split "*avx2_pcmp<mode>3_1" > > [(set (match_operand:VI_128_256 0 "register_operand") > > (vec_merge:VI_128_256 > > @@ -18227,6 +18238,17 @@ (define_insn "*sse4_1_eqv2di3" > > (set_attr "prefix" "orig,orig,vex") > > (set_attr "mode" "TI")]) > > > > +(define_split > > + [(set (match_operand:V2DI 0 "register_operand") > > + (eq:V2DI > > + (match_operand:V2DI 1 "vector_operand") > > + (match_operand:V2DI 2 "vector_operand")))] > > + "TARGET_SSE4_1 && rtx_equal_p (operands[1], operands[2])" > > + [(set (match_dup 0) (match_dup 1))] > > +{ > > + operands[1] = CONSTM1_RTX (V2DImode); > > +}) > > + > > (define_insn "*sse2_eq<mode>3" > > [(set (match_operand:VI124_128 0 "register_operand" "=x,x") > > (eq:VI124_128 > > @@ -18243,6 +18265,17 @@ (define_insn "*sse2_eq<mode>3" > > (set_attr "prefix" "orig,vex") > > (set_attr "mode" "TI")]) > > > > +(define_split > > + [(set (match_operand:VI124_128 0 "register_operand") > > + (eq:VI124_128 > > + (match_operand:VI124_128 1 "vector_operand") > > + (match_operand:VI124_128 2 "vector_operand")))] > > + "TARGET_SSE2 && rtx_equal_p (operands[1], operands[2])" > > + [(set (match_dup 0) (match_dup 1))] > > +{ > > + operands[1] = CONSTM1_RTX (<MODE>mode); > > +}) > > + > > (define_insn "sse4_2_gtv2di3" > > [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x") > > (gt:V2DI > > diff --git a/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c > > b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c > > new file mode 100644 > > index 00000000000..871d489b730 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c > > @@ -0,0 +1,71 @@ > > +/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */ > > +/* { dg-additional-options "-O2 -march=x86-64-v3" } */ > > + > > +typedef int v4si __attribute__((vector_size(16))); > > +typedef int v8si __attribute__((vector_size(32))); > > +typedef int v2di __attribute__((vector_size(16))); > > + > > +v4si __RTL (startwith ("vregs1")) foo1 (void) > > +{ > > +(function "foo1" > > + (insn-chain > > + (block 2 > > + (edge-from entry (flags "FALLTHRU")) > > + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) > > + (cnote 2 NOTE_INSN_FUNCTION_BEG) > > + (cinsn 3 (set (reg:V4SI <0>) (const_vector:V4SI [(const_int -1) > > (const_int -1) (const_int -1) (const_int -1)]))) > > + (cinsn 4 (set (reg:V4SI <1>) (const_vector:V4SI [(const_int -1) > > (const_int -1) (const_int -1) (const_int -1)]))) > > + (cinsn 5 (set (reg:V4SI <2>) > > + (eq:V4SI (reg:V4SI <0>) (reg:V4SI <1>)))) > > + (cinsn 6 (set (reg:V4SI <3>) (reg:V4SI <2>))) > > + (cinsn 7 (set (reg:V4SI xmm0) (reg:V4SI <3>))) > > + (edge-to exit (flags "FALLTHRU")) > > + ) > > + ) > > + (crtl (return_rtx (reg/i:V4SI xmm0))) > > +) > > +} > > + > > +v8si __RTL (startwith ("vregs1")) foo2 (void) > > +{ > > +(function "foo2" > > + (insn-chain > > + (block 2 > > + (edge-from entry (flags "FALLTHRU")) > > + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) > > + (cnote 2 NOTE_INSN_FUNCTION_BEG) > > + (cinsn 3 (set (reg:V8SI <0>) (const_vector:V8SI [(const_int -1) > > (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) > > (const_int -1) (const_int -1)]))) > > + (cinsn 4 (set (reg:V8SI <1>) (const_vector:V8SI [(const_int -1) > > (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) > > (const_int -1) (const_int -1)]))) > > + (cinsn 5 (set (reg:V8SI <2>) > > + (eq:V8SI (reg:V8SI <0>) (reg:V8SI <1>)))) > > + (cinsn 6 (set (reg:V8SI <3>) (reg:V8SI <2>))) > > + (cinsn 7 (set (reg:V8SI xmm0) (reg:V8SI <3>))) > > + (edge-to exit (flags "FALLTHRU")) > > + ) > > + ) > > + (crtl (return_rtx (reg/i:V8SI xmm0))) > > +) > > +} > > + > > +v2di __RTL (startwith ("vregs1")) foo3 (void) > > +{ > > +(function "foo3" > > + (insn-chain > > + (block 2 > > + (edge-from entry (flags "FALLTHRU")) > > + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) > > + (cnote 2 NOTE_INSN_FUNCTION_BEG) > > + (cinsn 3 (set (reg:V2DI <0>) (const_vector:V2DI [(const_int -1) > > (const_int -1)]))) > > + (cinsn 4 (set (reg:V2DI <1>) (const_vector:V2DI [(const_int -1) > > (const_int -1)]))) > > + (cinsn 5 (set (reg:V2DI <2>) > > + (eq:V2DI (reg:V2DI <0>) (reg:V2DI <1>)))) > > + (cinsn 6 (set (reg:V2DI <3>) (reg:V2DI <2>))) > > + (cinsn 7 (set (reg:V2DI xmm0) (reg:V2DI <3>))) > > + (edge-to exit (flags "FALLTHRU")) > > + ) > > + ) > > + (crtl (return_rtx (reg/i:V2DI xmm0))) > > +) > > +} > > + > > +/* { dg-final { scan-assembler-times "vpcmpeq" 3 } } */ > > -- > > 2.47.1 > > -- H.J.