-(define_split - [(set (match_operand:V2HI 0 "register_operand") - (eq:V2HI - (eq:V2HI - (us_minus:V2HI - (match_operand:V2HI 1 "register_operand") - (match_operand:V2HI 2 "register_operand")) - (match_operand:V2HI 3 "const0_operand")) - (match_operand:V2HI 4 "const0_operand")))] - "TARGET_SSE4_1" - [(set (match_dup 0) - (umin:V2HI (match_dup 1) (match_dup 2))) - (set (match_dup 0) - (eq:V2HI (match_dup 0) (match_dup 2)))])
the splitter is wrong when op1 == op2.(the original pattern returns 0, after split, it returns 1) So remove the splitter. Also extend another define_split to define_insn_and_split to handle below pattern 494(set (reg:V4QI 112) 495 (unspec:V4QI [ 496 (subreg:V4QI (reg:V2HF 111 [ bf ]) 0) 497 (subreg:V4QI (reg:V2HF 110 [ af ]) 0) 498 (subreg:V4QI (eq:V2HI (eq:V2HI (reg:V2HI 105) 499 (const_vector:V2HI [ 500 (const_int 0 [0]) repeated x2 501 ])) 502 (const_vector:V2HI [ 503 (const_int 0 [0]) repeated x2 504 ])) 0) 505 ] UNSPEC_BLENDV)) define_split doesn't work since pass_combine assumes it produces at most 2 insns after split, but here it produces 3 since we need to move const0_rtx (V2HImode) to reg. The move insn can be eliminated later. Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Ready push to trunk. gcc/ChangeLog: PR target/112276 * config/i386/mmx.md (*mmx_pblendvb_v8qi_1): Change define_split to define_insn_and_split to handle immediate_operand for comparison. (*mmx_pblendvb_v8qi_2): Ditto. (*mmx_pblendvb_<mode>_1): Ditto. (*mmx_pblendvb_v4qi_2): Ditto. (<code><mode>3): Remove define_split after it. (<code>v8qi3): Ditto. (<code><mode>3): Ditto. (<ode>v2hi3): Ditto. gcc/testsuite/ChangeLog: * g++.target/i386/part-vect-vcondhf.C: Adjust testcase. * gcc.target/i386/pr112276.c: New test. --- gcc/config/i386/mmx.md | 112 ++++++------------ .../g++.target/i386/part-vect-vcondhf.C | 1 - gcc/testsuite/gcc.target/i386/pr112276.c | 36 ++++++ 3 files changed, 70 insertions(+), 79 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr112276.c diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index e3d0fb5b107..2b97bb8fa98 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -3360,21 +3360,6 @@ (define_insn "<code><mode>3" (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) -(define_split - [(set (match_operand:V4HI 0 "register_operand") - (eq:V4HI - (eq:V4HI - (us_minus:V4HI - (match_operand:V4HI 1 "register_operand") - (match_operand:V4HI 2 "register_operand")) - (match_operand:V4HI 3 "const0_operand")) - (match_operand:V4HI 4 "const0_operand")))] - "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" - [(set (match_dup 0) - (umin:V4HI (match_dup 1) (match_dup 2))) - (set (match_dup 0) - (eq:V4HI (match_dup 0) (match_dup 2)))]) - (define_expand "mmx_<code>v8qi3" [(set (match_operand:V8QI 0 "register_operand") (umaxmin:V8QI @@ -3408,21 +3393,6 @@ (define_expand "<code>v8qi3" (match_operand:V8QI 2 "register_operand")))] "TARGET_MMX_WITH_SSE") -(define_split - [(set (match_operand:V8QI 0 "register_operand") - (eq:V8QI - (eq:V8QI - (us_minus:V8QI - (match_operand:V8QI 1 "register_operand") - (match_operand:V8QI 2 "register_operand")) - (match_operand:V8QI 3 "const0_operand")) - (match_operand:V8QI 4 "const0_operand")))] - "TARGET_MMX_WITH_SSE" - [(set (match_dup 0) - (umin:V8QI (match_dup 1) (match_dup 2))) - (set (match_dup 0) - (eq:V8QI (match_dup 0) (match_dup 2)))]) - (define_insn "<code><mode>3" [(set (match_operand:VI1_16_32 0 "register_operand" "=x,Yw") (umaxmin:VI1_16_32 @@ -3436,21 +3406,6 @@ (define_insn "<code><mode>3" (set_attr "type" "sseiadd") (set_attr "mode" "TI")]) -(define_split - [(set (match_operand:V4QI 0 "register_operand") - (eq:V4QI - (eq:V4QI - (us_minus:V4QI - (match_operand:V4QI 1 "register_operand") - (match_operand:V4QI 2 "register_operand")) - (match_operand:V4QI 3 "const0_operand")) - (match_operand:V4QI 4 "const0_operand")))] - "TARGET_SSE2" - [(set (match_dup 0) - (umin:V4QI (match_dup 1) (match_dup 2))) - (set (match_dup 0) - (eq:V4QI (match_dup 0) (match_dup 2)))]) - (define_insn "<code>v2hi3" [(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,Yv") (umaxmin:V2HI @@ -3467,21 +3422,6 @@ (define_insn "<code>v2hi3" (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) -(define_split - [(set (match_operand:V2HI 0 "register_operand") - (eq:V2HI - (eq:V2HI - (us_minus:V2HI - (match_operand:V2HI 1 "register_operand") - (match_operand:V2HI 2 "register_operand")) - (match_operand:V2HI 3 "const0_operand")) - (match_operand:V2HI 4 "const0_operand")))] - "TARGET_SSE4_1" - [(set (match_dup 0) - (umin:V2HI (match_dup 1) (match_dup 2))) - (set (match_dup 0) - (eq:V2HI (match_dup 0) (match_dup 2)))]) - (define_insn "ssse3_abs<mode>2" [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv") (abs:MMXMODEI @@ -3954,7 +3894,7 @@ (define_insn "mmx_pblendvb_v8qi" (set_attr "btver2_decode" "vector") (set_attr "mode" "TI")]) -(define_split +(define_insn_and_split "*mmx_pblendvb_v8qi_1" [(set (match_operand:V8QI 0 "register_operand") (unspec:V8QI [(match_operand:V8QI 1 "register_operand") @@ -3962,21 +3902,26 @@ (define_split (eq:V8QI (eq:V8QI (match_operand:V8QI 3 "register_operand") - (match_operand:V8QI 4 "register_operand")) + (match_operand:V8QI 4 "nonmemory_operand")) (match_operand:V8QI 5 "const0_operand"))] UNSPEC_BLENDV))] - "TARGET_MMX_WITH_SSE" + "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()" + "#" + "&& 1" [(set (match_dup 6) - (eq:V8QI (match_dup 3) (match_dup 4))) + (eq:V8QI (match_dup 3) (match_dup 7))) (set (match_dup 0) (unspec:V8QI [(match_dup 2) (match_dup 1) (match_dup 6)] UNSPEC_BLENDV))] - "operands[6] = gen_reg_rtx (V8QImode);") +{ + operands[6] = gen_reg_rtx (V8QImode); + operands[7] = force_reg (V8QImode, operands[4]); +}) -(define_split +(define_insn_and_split "*mmx_pblendvb_v8qi_2" [(set (match_operand:V8QI 0 "register_operand") (unspec:V8QI [(match_operand:V8QI 1 "register_operand") @@ -3985,12 +3930,14 @@ (define_split (eq:MMXMODE24 (eq:MMXMODE24 (match_operand:MMXMODE24 3 "register_operand") - (match_operand:MMXMODE24 4 "register_operand")) + (match_operand:MMXMODE24 4 "nonmemory_operand")) (match_operand:MMXMODE24 5 "const0_operand")) 0)] UNSPEC_BLENDV))] - "TARGET_MMX_WITH_SSE" + "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()" + "#" + "&& 1" [(set (match_dup 6) - (eq:MMXMODE24 (match_dup 3) (match_dup 4))) + (eq:MMXMODE24 (match_dup 3) (match_dup 8))) (set (match_dup 0) (unspec:V8QI [(match_dup 2) @@ -4000,6 +3947,7 @@ (define_split { operands[6] = gen_reg_rtx (<MODE>mode); operands[7] = lowpart_subreg (V8QImode, operands[6], <MODE>mode); + operands[8] = force_reg (<MODE>mode, operands[4]); }) (define_insn "mmx_pblendvb_<mode>" @@ -4022,7 +3970,7 @@ (define_insn "mmx_pblendvb_<mode>" (set_attr "btver2_decode" "vector") (set_attr "mode" "TI")]) -(define_split +(define_insn_and_split "*mmx_pblendvb_<mode>_1" [(set (match_operand:VI_16_32 0 "register_operand") (unspec:VI_16_32 [(match_operand:VI_16_32 1 "register_operand") @@ -4030,21 +3978,26 @@ (define_split (eq:VI_16_32 (eq:VI_16_32 (match_operand:VI_16_32 3 "register_operand") - (match_operand:VI_16_32 4 "register_operand")) + (match_operand:VI_16_32 4 "nonmemory_operand")) (match_operand:VI_16_32 5 "const0_operand"))] UNSPEC_BLENDV))] - "TARGET_SSE2" + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" [(set (match_dup 6) - (eq:VI_16_32 (match_dup 3) (match_dup 4))) + (eq:VI_16_32 (match_dup 3) (match_dup 7))) (set (match_dup 0) (unspec:VI_16_32 [(match_dup 2) (match_dup 1) (match_dup 6)] UNSPEC_BLENDV))] - "operands[6] = gen_reg_rtx (<MODE>mode);") +{ + operands[6] = gen_reg_rtx (<MODE>mode); + operands[7] = force_reg (<MODE>mode, operands[4]); +}) -(define_split +(define_insn_and_split "*mmx_pblendvb_v4qi_2" [(set (match_operand:V4QI 0 "register_operand") (unspec:V4QI [(match_operand:V4QI 1 "register_operand") @@ -4053,12 +4006,14 @@ (define_split (eq:V2HI (eq:V2HI (match_operand:V2HI 3 "register_operand") - (match_operand:V2HI 4 "register_operand")) + (match_operand:V2HI 4 "nonmemory_operand")) (match_operand:V2HI 5 "const0_operand")) 0)] UNSPEC_BLENDV))] - "TARGET_SSE2" + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" [(set (match_dup 6) - (eq:V2HI (match_dup 3) (match_dup 4))) + (eq:V2HI (match_dup 3) (match_dup 8))) (set (match_dup 0) (unspec:V4QI [(match_dup 2) @@ -4068,6 +4023,7 @@ (define_split { operands[6] = gen_reg_rtx (V2HImode); operands[7] = lowpart_subreg (V4QImode, operands[6], V2HImode); + operands[8] = force_reg (V2HImode, operands[4]); }) ;; XOP parallel XMM conditional moves diff --git a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C index f19727816cf..e623e6cde79 100644 --- a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C +++ b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C @@ -3,7 +3,6 @@ /* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ /* { dg-final { scan-assembler-times "vpcmpeqw" 6 } } */ /* { dg-final { scan-assembler-times "vpcmpgtw" 2 } } */ -/* { dg-final { scan-assembler-times "vpminuw" 2 } } */ /* { dg-final { scan-assembler-times "vcmpph" 8 } } */ /* { dg-final { scan-assembler-times "vpblendvb" 8 } } */ typedef unsigned short __attribute__((__vector_size__ (4))) __v2hu; diff --git a/gcc/testsuite/gcc.target/i386/pr112276.c b/gcc/testsuite/gcc.target/i386/pr112276.c new file mode 100644 index 00000000000..5365313f4c2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112276.c @@ -0,0 +1,36 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O2 -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ + +#include "sse4_1-check.h" + +typedef unsigned short __attribute__((__vector_size__ (8))) U4; +typedef unsigned short __attribute__((__vector_size__ (4))) U2; + +U4 +__attribute__((noipa)) +foo4 (U4 a, U4 b) +{ + return a > b; +} + +U2 +__attribute__((noipa)) +foo2 (U2 a, U2 b) +{ + return a > b; +} + +static void +sse4_1_test () +{ + U4 a = __extension__(U4) {1, 1, 1, 1}; + U4 b = foo4 (a, a); + if (b[0] || b[1] || b[2] || b[3]) __builtin_abort(); + + U2 c = __extension__(U2) {1, 1}; + U2 d = foo2 (c, c); + if (d[0] || d[1]) __builtin_abort(); + + return; +} -- 2.31.1