2021-05-21 Uroš Bizjak <ubiz...@gmail.com> gcc/ PR target/100637 * config/i386/i386-expand.c (ix86_expand_sse_movcc): Handle V4QI and V2HI modes. (ix86_expand_sse_movcc): Ditto. * config/i386/mmx.md (*<sat_plusminus:insn><VI_32:mode>3): New instruction pattern. (*eq<VI_32:mode>3): Ditto. (*gt<VI_32:mode>3): Ditto. (*xop_pcmov_<VI_32:mode>): Ditto. (mmx_pblendvb32): Ditto. (mmx_pblendvb64): Rename from mmx_pblendvb. (vec_cmp<VI_32:mode><VI_32:mode>): New expander. (vec_cmpu<VI_32:mode><VI_32:mode>): Ditto. (vcond<VI_32:mode><VI_32:mode>): Ditto. (vcondu<VI_32:mode><VI_32:mode>): Ditto. (vcond_mask_<VI_32:mode><VI_32:mode>): Ditto.
gcc/testsuite/ PR target/100637 * g++.target/i386/pr100637-1b.C: New test. * g++.target/i386/pr100637-1w.C: Ditto. * gcc.target/i386/pr100637-2b.c: Ditto. * gcc.target/i386/pr100637-2w.c: Ditto. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Pushed to master. Uros.
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 9f3d41955a2..931b3362144 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -3721,7 +3721,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) { op_true = force_reg (mode, op_true); - gen = gen_mmx_pblendvb; + gen = gen_mmx_pblendvb64; if (mode != V8QImode) d = gen_reg_rtx (V8QImode); op_false = gen_lowpart (V8QImode, op_false); @@ -3729,6 +3729,20 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) cmp = gen_lowpart (V8QImode, cmp); } break; + case E_V4QImode: + case E_V2HImode: + if (TARGET_SSE4_1) + { + op_true = force_reg (mode, op_true); + + gen = gen_mmx_pblendvb32; + if (mode != V4QImode) + d = gen_reg_rtx (V4QImode); + op_false = gen_lowpart (V4QImode, op_false); + op_true = gen_lowpart (V4QImode, op_true); + cmp = gen_lowpart (V4QImode, cmp); + } + break; case E_V16QImode: case E_V8HImode: case E_V4SImode: @@ -4241,6 +4255,12 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, else if (code == GT && TARGET_SSE4_1) gen = gen_sminv8qi3; break; + case E_V4QImode: + if (code == GTU && TARGET_SSE2) + gen = gen_uminv4qi3; + else if (code == GT && TARGET_SSE4_1) + gen = gen_sminv4qi3; + break; case E_V8HImode: if (code == GTU && TARGET_SSE4_1) gen = gen_uminv8hi3; @@ -4253,6 +4273,12 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, else if (code == GT && TARGET_SSE2) gen = gen_sminv4hi3; break; + case E_V2HImode: + if (code == GTU && TARGET_SSE4_1) + gen = gen_uminv2hi3; + else if (code == GT && TARGET_SSE2) + gen = gen_sminv2hi3; + break; case E_V4SImode: if (TARGET_SSE4_1) gen = (code == GTU) ? gen_uminv4si3 : gen_sminv4si3; @@ -4327,8 +4353,10 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, case E_V16HImode: case E_V16QImode: case E_V8QImode: + case E_V4QImode: case E_V8HImode: case E_V4HImode: + case E_V2HImode: /* Perform a parallel unsigned saturating subtraction. */ x = gen_reg_rtx (mode); emit_insn (gen_rtx_SET diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 5e92be34545..4c42e6d93dc 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1403,6 +1403,20 @@ (define_insn "*mmx_<insn><mode>3" (set_attr "type" "mmxadd,sseadd,sseadd") (set_attr "mode" "DI,TI,TI")]) +(define_insn "*<insn><mode>3" + [(set (match_operand:VI_32 0 "register_operand" "=x,Yw") + (sat_plusminus:VI_32 + (match_operand:VI_32 1 "register_operand" "<comm>0,Yw") + (match_operand:VI_32 2 "register_operand" "x,Yw")))] + "TARGET_SSE2 + && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" + "@ + p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2} + vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "mode" "TI")]) + (define_expand "mmx_mulv4hi3" [(set (match_operand:V4HI 0 "register_operand") (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand") @@ -2032,6 +2046,20 @@ (define_insn "*mmx_eq<mode>3" (set_attr "type" "mmxcmp,ssecmp,ssecmp") (set_attr "mode" "DI,TI,TI")]) +(define_insn "*eq<mode>3" + [(set (match_operand:VI_32 0 "register_operand" "=x,x") + (eq:VI_32 + (match_operand:VI_32 1 "register_operand" "%0,x") + (match_operand:VI_32 2 "register_operand" "x,x")))] + "TARGET_SSE2 + && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" + "@ + pcmpeq<mmxvecsize>\t{%2, %0|%0, %2} + vpcmpeq<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + (define_insn "mmx_gt<mode>3" [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x") (gt:MMXMODEI @@ -2047,6 +2075,19 @@ (define_insn "mmx_gt<mode>3" (set_attr "type" "mmxcmp,ssecmp,ssecmp") (set_attr "mode" "DI,TI,TI")]) +(define_insn "*gt<mode>3" + [(set (match_operand:VI_32 0 "register_operand" "=x,x") + (gt:VI_32 + (match_operand:VI_32 1 "register_operand" "0,x") + (match_operand:VI_32 2 "register_operand" "x,x")))] + "TARGET_SSE2" + "@ + pcmpgt<mmxvecsize>\t{%2, %0|%0, %2} + vpcmpgt<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + (define_expand "vec_cmp<mode><mode>" [(set (match_operand:MMXMODEI 0 "register_operand") (match_operator:MMXMODEI 1 "" @@ -2059,6 +2100,18 @@ (define_expand "vec_cmp<mode><mode>" DONE; }) +(define_expand "vec_cmp<mode><mode>" + [(set (match_operand:VI_32 0 "register_operand") + (match_operator:VI_32 1 "" + [(match_operand:VI_32 2 "register_operand") + (match_operand:VI_32 3 "register_operand")]))] + "TARGET_SSE2" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + (define_expand "vec_cmpu<mode><mode>" [(set (match_operand:MMXMODEI 0 "register_operand") (match_operator:MMXMODEI 1 "" @@ -2071,6 +2124,18 @@ (define_expand "vec_cmpu<mode><mode>" DONE; }) +(define_expand "vec_cmpu<mode><mode>" + [(set (match_operand:VI_32 0 "register_operand") + (match_operator:VI_32 1 "" + [(match_operand:VI_32 2 "register_operand") + (match_operand:VI_32 3 "register_operand")]))] + "TARGET_SSE2" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + (define_expand "vcond<MMXMODE124:mode><MMXMODEI:mode>" [(set (match_operand:MMXMODE124 0 "register_operand") (if_then_else:MMXMODE124 @@ -2088,6 +2153,21 @@ (define_expand "vcond<MMXMODE124:mode><MMXMODEI:mode>" DONE; }) +(define_expand "vcond<mode><mode>" + [(set (match_operand:VI_32 0 "register_operand") + (if_then_else:VI_32 + (match_operator 3 "" + [(match_operand:VI_32 4 "register_operand") + (match_operand:VI_32 5 "register_operand")]) + (match_operand:VI_32 1) + (match_operand:VI_32 2)))] + "TARGET_SSE2" +{ + bool ok = ix86_expand_int_vcond (operands); + gcc_assert (ok); + DONE; +}) + (define_expand "vcondu<MMXMODE124:mode><MMXMODEI:mode>" [(set (match_operand:MMXMODE124 0 "register_operand") (if_then_else:MMXMODE124 @@ -2105,6 +2185,21 @@ (define_expand "vcondu<MMXMODE124:mode><MMXMODEI:mode>" DONE; }) +(define_expand "vcondu<mode><mode>" + [(set (match_operand:VI_32 0 "register_operand") + (if_then_else:VI_32 + (match_operator 3 "" + [(match_operand:VI_32 4 "register_operand") + (match_operand:VI_32 5 "register_operand")]) + (match_operand:VI_32 1) + (match_operand:VI_32 2)))] + "TARGET_SSE2" +{ + bool ok = ix86_expand_int_vcond (operands); + gcc_assert (ok); + DONE; +}) + (define_expand "vcond_mask_<mode><mmxintvecmodelower>" [(set (match_operand:MMXMODE124 0 "register_operand") (vec_merge:MMXMODE124 @@ -2118,7 +2213,20 @@ (define_expand "vcond_mask_<mode><mmxintvecmodelower>" DONE; }) -(define_insn "mmx_pblendvb" +(define_expand "vcond_mask_<mode><mode>" + [(set (match_operand:VI_32 0 "register_operand") + (vec_merge:VI_32 + (match_operand:VI_32 1 "register_operand") + (match_operand:VI_32 2 "register_operand") + (match_operand:VI_32 3 "register_operand")))] + "TARGET_SSE2" +{ + ix86_expand_sse_movcc (operands[0], operands[3], + operands[1], operands[2]); + DONE; +}) + +(define_insn "mmx_pblendvb64" [(set (match_operand:V8QI 0 "register_operand" "=Yr,*x,x") (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,x") @@ -2138,6 +2246,26 @@ (define_insn "mmx_pblendvb" (set_attr "btver2_decode" "vector") (set_attr "mode" "TI")]) +(define_insn "mmx_pblendvb32" + [(set (match_operand:V4QI 0 "register_operand" "=Yr,*x,x") + (unspec:V4QI + [(match_operand:V4QI 1 "register_operand" "0,0,x") + (match_operand:V4QI 2 "register_operand" "Yr,*x,x") + (match_operand:V4QI 3 "register_operand" "Yz,Yz,x")] + UNSPEC_BLENDV))] + "TARGET_SSE4_1" + "@ + pblendvb\t{%3, %2, %0|%0, %2, %3} + pblendvb\t{%3, %2, %0|%0, %2, %3} + vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "*,*,1") + (set_attr "prefix" "orig,orig,vex") + (set_attr "btver2_decode" "vector") + (set_attr "mode" "TI")]) + ;; XOP parallel XMM conditional moves (define_insn "*xop_pcmov_<mode>" [(set (match_operand:MMXMODE124 0 "register_operand" "=x") @@ -2149,6 +2277,16 @@ (define_insn "*xop_pcmov_<mode>" "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg")]) +(define_insn "*xop_pcmov_<mode>" + [(set (match_operand:VI_32 0 "register_operand" "=x") + (if_then_else:VI_32 + (match_operand:VI_32 3 "register_operand" "x") + (match_operand:VI_32 1 "register_operand" "x") + (match_operand:VI_32 2 "register_operand" "x")))] + "TARGET_XOP" + "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel integral logical operations diff --git a/gcc/testsuite/g++.target/i386/pr100637-1b.C b/gcc/testsuite/g++.target/i386/pr100637-1b.C new file mode 100644 index 00000000000..35b5df7c9dd --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr100637-1b.C @@ -0,0 +1,17 @@ +/* PR target/100637 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ + +typedef unsigned char __attribute__((__vector_size__ (4))) __v4qu; +typedef char __attribute__((__vector_size__ (4))) __v4qi; + +__v4qu au, bu; +__v4qi as, bs; + +__v4qu uu (__v4qu a, __v4qu b) { return (a > b) ? au : bu; } +__v4qu us (__v4qi a, __v4qi b) { return (a > b) ? au : bu; } +__v4qi su (__v4qu a, __v4qu b) { return (a > b) ? as : bs; } +__v4qi ss (__v4qi a, __v4qi b) { return (a > b) ? as : bs; } + +/* { dg-final { scan-assembler-times "pcmpeqb" 2 } } */ +/* { dg-final { scan-assembler-times "pcmpgtb" 2 } } */ diff --git a/gcc/testsuite/g++.target/i386/pr100637-1w.C b/gcc/testsuite/g++.target/i386/pr100637-1w.C new file mode 100644 index 00000000000..a3ed06fddee --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr100637-1w.C @@ -0,0 +1,17 @@ +/* PR target/100637 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ + +typedef unsigned short __attribute__((__vector_size__ (4))) __v2hu; +typedef short __attribute__((__vector_size__ (4))) __v2hi; + +__v2hu au, bu; +__v2hi as, bs; + +__v2hu uu (__v2hu a, __v2hu b) { return (a > b) ? au : bu; } +__v2hu us (__v2hi a, __v2hi b) { return (a > b) ? au : bu; } +__v2hi su (__v2hu a, __v2hu b) { return (a > b) ? as : bs; } +__v2hi ss (__v2hi a, __v2hi b) { return (a > b) ? as : bs; } + +/* { dg-final { scan-assembler-times "pcmpeqw" 2 } } */ +/* { dg-final { scan-assembler-times "pcmpgtw" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100637-2b.c b/gcc/testsuite/gcc.target/i386/pr100637-2b.c new file mode 100644 index 00000000000..04480bb5a9b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr100637-2b.c @@ -0,0 +1,21 @@ +/* PR target/100637 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ + +typedef char vec __attribute__((vector_size(4))); + +vec lt (vec a, vec b) { return a < b; } +vec le (vec a, vec b) { return a <= b; } +vec eq (vec a, vec b) { return a == b; } +vec ne (vec a, vec b) { return a != b; } +vec ge (vec a, vec b) { return a >= b; } +vec gt (vec a, vec b) { return a > b; } + +typedef unsigned char uvec __attribute__((vector_size(4))); + +vec ltu (uvec a, uvec b) { return a < b; } +vec leu (uvec a, uvec b) { return a <= b; } +vec geu (uvec a, uvec b) { return a >= b; } +vec gtu (uvec a, uvec b) { return a > b; } + +/* { dg-final { scan-assembler-not "cmpb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100637-2w.c b/gcc/testsuite/gcc.target/i386/pr100637-2w.c new file mode 100644 index 00000000000..fbbaac983f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr100637-2w.c @@ -0,0 +1,21 @@ +/* PR target/100637 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ + +typedef short vec __attribute__((vector_size(4))); + +vec lt (vec a, vec b) { return a < b; } +vec le (vec a, vec b) { return a <= b; } +vec eq (vec a, vec b) { return a == b; } +vec ne (vec a, vec b) { return a != b; } +vec ge (vec a, vec b) { return a >= b; } +vec gt (vec a, vec b) { return a > b; } + +typedef unsigned short uvec __attribute__((vector_size(4))); + +vec ltu (uvec a, uvec b) { return a < b; } +vec leu (uvec a, uvec b) { return a <= b; } +vec geu (uvec a, uvec b) { return a >= b; } +vec gtu (uvec a, uvec b) { return a > b; } + +/* { dg-final { scan-assembler-not "cmpw" } } */