On Sat, Nov 30, 2024 at 11:00 PM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> Add pcmpeq splitters to split
>
> (insn 5 3 7 2 (set (reg:V4SI 100)
>         (eq:V4SI (reg:V4SI 98)
>             (reg:V4SI 98))) 7910 {*sse2_eqv4si3}
>      (expr_list:REG_DEAD (reg:V4SI 98)
>         (expr_list:REG_EQUAL (eq:V4SI (const_vector:V4SI [
>                         (const_int -1 [0xffffffffffffffff]) repeated x4
>                     ])
>                 (const_vector:V4SI [
>                         (const_int -1 [0xffffffffffffffff]) repeated x4
>                     ]))
>             (nil))))
>
> to
>
> (insn 8 3 7 2 (set (reg:V4SI 100)
>         (const_vector:V4SI [
>                 (const_int -1 [0xffffffffffffffff]) repeated x4
>             ])) -1
>      (nil))

IMO, middle-end should handle these cases, and I'm surprised that it
doesn't. These RTXes are not unspecs.

OTOH, splitters should handle only nonmemory operands. Memory operands
can be volatile and we shouldn't remove these at will.

Uros.

> gcc/
>
>         PR target/117863
>         * config/i386/sse.md: Add pcmpeq splitters.
>
> gcc/testsuite/
>
>         PR target/117863
>         * gcc.dg/rtl/i386/vector_eq-2.c: New test.
>
> Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
> ---
>  gcc/config/i386/sse.md                      | 33 ++++++++++
>  gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c | 71 +++++++++++++++++++++
>  2 files changed, 104 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 498a42d6e1e..e2ce0781cb4 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -17943,6 +17943,17 @@ (define_insn "*avx2_eq<mode>3"
>     (set_attr "prefix" "vex")
>     (set_attr "mode" "OI")])
>
> +(define_split
> +  [(set (match_operand:VI_256 0 "register_operand")
> +       (eq:VI_256
> +         (match_operand:VI_256 1 "nonimmediate_operand")
> +         (match_operand:VI_256 2 "nonimmediate_operand")))]
> +  "TARGET_AVX2 && rtx_equal_p (operands[1], operands[2])"
> +  [(set (match_dup 0) (match_dup 1))]
> +{
> +  operands[1] = CONSTM1_RTX (<MODE>mode);
> +})
> +
>  (define_insn_and_split "*avx2_pcmp<mode>3_1"
>   [(set (match_operand:VI_128_256  0 "register_operand")
>         (vec_merge:VI_128_256
> @@ -18227,6 +18238,17 @@ (define_insn "*sse4_1_eqv2di3"
>     (set_attr "prefix" "orig,orig,vex")
>     (set_attr "mode" "TI")])
>
> +(define_split
> +  [(set (match_operand:V2DI 0 "register_operand")
> +       (eq:V2DI
> +         (match_operand:V2DI 1 "vector_operand")
> +         (match_operand:V2DI 2 "vector_operand")))]
> +  "TARGET_SSE4_1 && rtx_equal_p (operands[1], operands[2])"
> +  [(set (match_dup 0) (match_dup 1))]
> +{
> +  operands[1] = CONSTM1_RTX (V2DImode);
> +})
> +
>  (define_insn "*sse2_eq<mode>3"
>    [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
>         (eq:VI124_128
> @@ -18243,6 +18265,17 @@ (define_insn "*sse2_eq<mode>3"
>     (set_attr "prefix" "orig,vex")
>     (set_attr "mode" "TI")])
>
> +(define_split
> +  [(set (match_operand:VI124_128 0 "register_operand")
> +       (eq:VI124_128
> +         (match_operand:VI124_128 1 "vector_operand")
> +         (match_operand:VI124_128 2 "vector_operand")))]
> +  "TARGET_SSE2 && rtx_equal_p (operands[1], operands[2])"
> +  [(set (match_dup 0) (match_dup 1))]
> +{
> +  operands[1] = CONSTM1_RTX (<MODE>mode);
> +})
> +
>  (define_insn "sse4_2_gtv2di3"
>    [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
>         (gt:V2DI
> diff --git a/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c 
> b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c
> new file mode 100644
> index 00000000000..871d489b730
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c
> @@ -0,0 +1,71 @@
> +/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
> +/* { dg-additional-options "-O2 -march=x86-64-v3" } */
> +
> +typedef int v4si __attribute__((vector_size(16)));
> +typedef int v8si __attribute__((vector_size(32)));
> +typedef int v2di __attribute__((vector_size(16)));
> +
> +v4si __RTL (startwith ("vregs1")) foo1 (void)
> +{
> +(function "foo1"
> +  (insn-chain
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cnote 2 NOTE_INSN_FUNCTION_BEG)
> +      (cinsn 3 (set (reg:V4SI <0>) (const_vector:V4SI [(const_int -1) 
> (const_int -1) (const_int -1) (const_int -1)])))
> +      (cinsn 4 (set (reg:V4SI <1>) (const_vector:V4SI [(const_int -1) 
> (const_int -1) (const_int -1) (const_int -1)])))
> +      (cinsn 5 (set (reg:V4SI <2>)
> +                   (eq:V4SI (reg:V4SI <0>) (reg:V4SI <1>))))
> +      (cinsn 6 (set (reg:V4SI <3>) (reg:V4SI <2>)))
> +      (cinsn 7 (set (reg:V4SI xmm0) (reg:V4SI <3>)))
> +      (edge-to exit (flags "FALLTHRU"))
> +    )
> +  )
> + (crtl (return_rtx (reg/i:V4SI xmm0)))
> +)
> +}
> +
> +v8si __RTL (startwith ("vregs1")) foo2 (void)
> +{
> +(function "foo2"
> +  (insn-chain
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cnote 2 NOTE_INSN_FUNCTION_BEG)
> +      (cinsn 3 (set (reg:V8SI <0>) (const_vector:V8SI [(const_int -1) 
> (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) 
> (const_int -1) (const_int -1)])))
> +      (cinsn 4 (set (reg:V8SI <1>) (const_vector:V8SI [(const_int -1) 
> (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) 
> (const_int -1) (const_int -1)])))
> +      (cinsn 5 (set (reg:V8SI <2>)
> +                   (eq:V8SI (reg:V8SI <0>) (reg:V8SI <1>))))
> +      (cinsn 6 (set (reg:V8SI <3>) (reg:V8SI <2>)))
> +      (cinsn 7 (set (reg:V8SI xmm0) (reg:V8SI <3>)))
> +      (edge-to exit (flags "FALLTHRU"))
> +    )
> +  )
> + (crtl (return_rtx (reg/i:V8SI xmm0)))
> +)
> +}
> +
> +v2di __RTL (startwith ("vregs1")) foo3 (void)
> +{
> +(function "foo3"
> +  (insn-chain
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cnote 2 NOTE_INSN_FUNCTION_BEG)
> +      (cinsn 3 (set (reg:V2DI <0>) (const_vector:V2DI [(const_int -1) 
> (const_int -1)])))
> +      (cinsn 4 (set (reg:V2DI <1>) (const_vector:V2DI [(const_int -1) 
> (const_int -1)])))
> +      (cinsn 5 (set (reg:V2DI <2>)
> +                   (eq:V2DI (reg:V2DI <0>) (reg:V2DI <1>))))
> +      (cinsn 6 (set (reg:V2DI <3>) (reg:V2DI <2>)))
> +      (cinsn 7 (set (reg:V2DI xmm0) (reg:V2DI <3>)))
> +      (edge-to exit (flags "FALLTHRU"))
> +    )
> +  )
> + (crtl (return_rtx (reg/i:V2DI xmm0)))
> +)
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeq" 3 } } */
> --
> 2.47.1
>

Reply via email to