Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 and x < 0 ? 1 : 0 into (unsigned) x >> 31.
Add define_insn_and_split for the optimization did in ix86_expand_int_vcond. gcc/ChangeLog: PR target/115517 * config/i386/sse.md ("*ashr<mode>3_1"): New define_insn_and_split. (*avx512_ashr<mode>3_1): Ditto. (*avx2_lshr<mode>3_1): Ditto. (*avx2_lshr<mode>3_2): Ditto and add 2 combine splitter after it. * config/i386/mmx.md (mmxscalarsize): New mode attribute. (*mmw_ashr<mode>3_1): New define_insn_and_split. ("mmx_<insn><mode>3): Add a combine spiltter after it. (*mmx_ashrv2hi3_1): New define_insn_and_plit, also add a combine splitter after it. gcc/testsuite/ChangeLog: * gcc.target/i386/avx2-pr115517.c: New test. * gcc.target/i386/avx512-pr115517.c: New test. * g++.target/i386/avx2-pr115517.C: New test. * g++.target/i386/avx512-pr115517.C: New test. * gcc.target/i386/pr111023-2.c: Adjust testcase. * gcc.target/i386/vect-div-1.c: Ditto. --- gcc/config/i386/mmx.md | 52 ++++++++++++ gcc/config/i386/sse.md | 83 +++++++++++++++++++ gcc/testsuite/g++.target/i386/avx2-pr115517.C | 60 ++++++++++++++ .../g++.target/i386/avx512-pr115517.C | 70 ++++++++++++++++ gcc/testsuite/gcc.target/i386/avx2-pr115517.c | 33 ++++++++ .../gcc.target/i386/avx512-pr115517.c | 70 ++++++++++++++++ gcc/testsuite/gcc.target/i386/pr111023-2.c | 4 +- gcc/testsuite/gcc.target/i386/vect-div-1.c | 3 +- 8 files changed, 372 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/g++.target/i386/avx2-pr115517.C create mode 100644 gcc/testsuite/g++.target/i386/avx512-pr115517.C create mode 100644 gcc/testsuite/gcc.target/i386/avx2-pr115517.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512-pr115517.c diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index ea53f516cbb..7262bf146c2 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -135,6 +135,14 @@ (define_mode_attr mmxscalarmodelower (V4HI "hi") (V2HI "hi") (V8QI "qi")]) +(define_mode_attr mmxscalarsize + [(V1DI "64") + (V2SI "32") (V2SF "32") + (V4HF "16") (V4BF "16") + (V2HF "16") (V2BF "16") + (V4HI "16") (V2HI "16") + (V8QI "8")]) + (define_mode_attr Yv_Yw [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")]) @@ -3608,6 +3616,17 @@ (define_insn "mmx_ashr<mode>3" (const_string "0"))) (set_attr "mode" "DI,TI,TI")]) +(define_insn_and_split "*mmx_ashr<mode>3_1" + [(set (match_operand:MMXMODE24 0 "register_operand") + (lt:MMXMODE24 + (match_operand:MMXMODE24 1 "register_operand") + (match_operand:MMXMODE24 2 "const0_operand")))] + "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) (ashiftrt:MMXMODE24 (match_dup 1) (match_dup 3)))] + "operands[3] = gen_int_mode (<mmxscalarsize> - 1, DImode);") + (define_expand "ashr<mode>3" [(set (match_operand:MMXMODE24 0 "register_operand") (ashiftrt:MMXMODE24 @@ -3634,6 +3653,17 @@ (define_insn "mmx_<insn><mode>3" (const_string "0"))) (set_attr "mode" "DI,TI,TI")]) +(define_split + [(set (match_operand:MMXMODE248 0 "register_operand") + (and:MMXMODE248 + (lt:MMXMODE248 + (match_operand:MMXMODE248 1 "register_operand") + (match_operand:MMXMODE248 2 "const0_operand")) + (match_operand:MMXMODE248 3 "const1_operand")))] + "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()" + [(set (match_dup 0) (lshiftrt:MMXMODE248 (match_dup 1) (match_dup 4)))] + "operands[4] = gen_int_mode (<mmxscalarsize> - 1, DImode);") + (define_expand "<insn><mode>3" [(set (match_operand:MMXMODE24 0 "register_operand") (any_lshift:MMXMODE24 @@ -3675,6 +3705,28 @@ (define_insn "<insn>v2hi3" (const_string "0"))) (set_attr "mode" "TI")]) +(define_insn_and_split "*mmx_ashrv2hi3_1" + [(set (match_operand:V2HI 0 "register_operand") + (lt:V2HI + (match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "const0_operand")))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) (ashiftrt:V2HI (match_dup 1) (match_dup 3)))] + "operands[3] = gen_int_mode (15, DImode);") + +(define_split + [(set (match_operand:V2HI 0 "register_operand") + (and:V2HI + (lt:V2HI + (match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "const0_operand")) + (match_operand:V2HI 3 "const1_operand")))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + [(set (match_dup 0) (lshiftrt:V2HI (match_dup 1) (match_dup 4)))] + "operands[4] = gen_int_mode (15, DImode);") + (define_expand "<insn>v8qi3" [(set (match_operand:V8QI 0 "register_operand") (any_shift:V8QI (match_operand:V8QI 1 "register_operand") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5996ad99606..d86b6fa81c0 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -16860,6 +16860,17 @@ (define_insn "ashr<mode>3" (set_attr "prefix" "orig,vex") (set_attr "mode" "<sseinsnmode>")]) +(define_insn_and_split "*ashr<mode>3_1" + [(set (match_operand:VI24_AVX2 0 "register_operand") + (lt:VI24_AVX2 + (match_operand:VI24_AVX2 1 "register_operand") + (match_operand:VI24_AVX2 2 "const0_operand")))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) (ashiftrt:VI24_AVX2 (match_dup 1) (match_dup 3)))] + "operands[3] = gen_int_mode (<ssescalarsize> - 1, DImode);") + (define_insn "<mask_codefor>ashr<mode>3<mask_name>" [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v") (ashiftrt:VI248_AVX512BW_AVX512VL @@ -16874,6 +16885,23 @@ (define_insn "<mask_codefor>ashr<mode>3<mask_name>" (const_string "0"))) (set_attr "mode" "<sseinsnmode>")]) +(define_insn_and_split "*avx512_ashr<mode>3_1" + [(set (match_operand:VI248_AVX512VLBW 0 "register_operand") + (vec_merge:VI248_AVX512VLBW + (match_operand:VI248_AVX512VLBW 1 "vector_all_ones_operand") + (match_operand:VI248_AVX512VLBW 2 "const0_operand") + (unspec:<avx512fmaskmode> + [(match_operand:VI248_AVX512VLBW 3 "nonimmediate_operand") + (match_operand:VI248_AVX512VLBW 4 "const0_operand") + (const_int 1)] + UNSPEC_PCMP)))] + "TARGET_AVX512F && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (ashiftrt:VI248_AVX512VLBW (match_dup 3) (match_dup 5)))] + "operands[5] = gen_int_mode (<ssescalarsize> - 1, DImode);") + (define_expand "ashr<mode>3" [(set (match_operand:VI248_AVX512BW 0 "register_operand") (ashiftrt:VI248_AVX512BW @@ -17028,6 +17056,61 @@ (define_insn "<insn><mode>3" (set_attr "prefix" "orig,vex") (set_attr "mode" "<sseinsnmode>")]) +(define_insn_and_split "*avx2_lshr<mode>3_1" + [(set (match_operand:VI8_AVX2 0 "register_operand") + (and:VI8_AVX2 + (gt:VI8_AVX2 + (match_operand:VI8_AVX2 1 "register_operand") + (match_operand:VI8_AVX2 2 "register_operand")) + (match_operand:VI8_AVX2 3 "const1_operand")))] + "TARGET_SSE4_2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 5) (gt:VI8_AVX2 (match_dup 1) (match_dup 2))) + (set (match_dup 0) (lshiftrt:VI8_AVX2 (match_dup 5) (match_dup 4)))] +{ + operands[4] = gen_int_mode (<ssescalarsize> - 1, DImode); + operands[5] = gen_reg_rtx (<MODE>mode); +}) + +(define_insn_and_split "*avx2_lshr<mode>3_2" + [(set (match_operand:VI8_AVX2 0 "register_operand") + (and:VI8_AVX2 + (lt:VI8_AVX2 + (match_operand:VI8_AVX2 1 "register_operand") + (match_operand:VI8_AVX2 2 "const0_operand")) + (match_operand:VI8_AVX2 3 "const1_operand")))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) (lshiftrt:VI8_AVX2 (match_dup 1) (const_int 63)))]) + +(define_split + [(set (match_operand:VI248_AVX2 0 "register_operand") + (and:VI248_AVX2 + (lt:VI248_AVX2 + (match_operand:VI248_AVX2 1 "register_operand") + (match_operand:VI248_AVX2 2 "const0_operand")) + (match_operand:VI248_AVX2 3 "const1_operand")))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + [(set (match_dup 0) (lshiftrt:VI248_AVX2 (match_dup 1) (match_dup 4)))] + "operands[4] = gen_int_mode (<ssescalarsize> - 1, DImode);") + +(define_split + [(set (match_operand:VI248_AVX512VLBW 0 "register_operand") + (vec_merge:VI248_AVX512VLBW + (match_operand:VI248_AVX512VLBW 1 "const1_operand") + (match_operand:VI248_AVX512VLBW 2 "const0_operand") + (unspec:<avx512fmaskmode> + [(match_operand:VI248_AVX512VLBW 3 "nonimmediate_operand") + (match_operand:VI248_AVX512VLBW 4 "const0_operand") + (const_int 1)] + UNSPEC_PCMP)))] + "TARGET_AVX512F && ix86_pre_reload_split ()" + [(set (match_dup 0) + (lshiftrt:VI248_AVX512VLBW (match_dup 3) (match_dup 5)))] + "operands[5] = gen_int_mode (<ssescalarsize> - 1, DImode);") + (define_insn "<insn><mode>3<mask_name>" [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v") (any_lshift:VI248_AVX512BW diff --git a/gcc/testsuite/g++.target/i386/avx2-pr115517.C b/gcc/testsuite/g++.target/i386/avx2-pr115517.C new file mode 100644 index 00000000000..ec000c57542 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/avx2-pr115517.C @@ -0,0 +1,60 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O2" } */ +/* { dg-final { scan-assembler-times "vpsrlq" 2 } } */ +/* { dg-final { scan-assembler-times "vpsrld" 2 } } */ +/* { dg-final { scan-assembler-times "vpsrlw" 2 } } */ + +typedef short v8hi __attribute__((vector_size(16))); +typedef short v16hi __attribute__((vector_size(32))); +typedef int v4si __attribute__((vector_size(16))); +typedef int v8si __attribute__((vector_size(32))); +typedef long long v2di __attribute__((vector_size(16))); +typedef long long v4di __attribute__((vector_size(32))); + +v8hi +foo (v8hi a) +{ + v8hi const1_op = __extension__(v8hi){1,1,1,1,1,1,1,1}; + v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0}; + return a < const0_op ? const1_op : const0_op; +} + +v16hi +foo2 (v16hi a) +{ + v16hi const1_op = __extension__(v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; + v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + return a < const0_op ? const1_op : const0_op; +} + +v4si +foo3 (v4si a) +{ + v4si const1_op = __extension__(v4si){1,1,1,1}; + v4si const0_op = __extension__(v4si){0,0,0,0}; + return a < const0_op ? const1_op : const0_op; +} + +v8si +foo4 (v8si a) +{ + v8si const1_op = __extension__(v8si){1,1,1,1,1,1,1,1}; + v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0}; + return a < const0_op ? const1_op : const0_op; +} + +v2di +foo3 (v2di a) +{ + v2di const1_op = __extension__(v2di){1,1}; + v2di const0_op = __extension__(v2di){0,0}; + return a < const0_op ? const1_op : const0_op; +} + +v4di +foo4 (v4di a) +{ + v4di const1_op = __extension__(v4di){1,1,1,1}; + v4di const0_op = __extension__(v4di){0,0,0,0}; + return a < const0_op ? const1_op : const0_op; +} diff --git a/gcc/testsuite/g++.target/i386/avx512-pr115517.C b/gcc/testsuite/g++.target/i386/avx512-pr115517.C new file mode 100644 index 00000000000..22df41bbdc9 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/avx512-pr115517.C @@ -0,0 +1,70 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vpsrad" 3 } } */ +/* { dg-final { scan-assembler-times "vpsraw" 3 } } */ +/* { dg-final { scan-assembler-times "vpsraq" 3 } } */ + +typedef short v8hi __attribute__((vector_size(16))); +typedef short v16hi __attribute__((vector_size(32))); +typedef short v32hi __attribute__((vector_size(64))); +typedef int v4si __attribute__((vector_size(16))); +typedef int v8si __attribute__((vector_size(32))); +typedef int v16si __attribute__((vector_size(64))); +typedef long long v2di __attribute__((vector_size(16))); +typedef long long v4di __attribute__((vector_size(32))); +typedef long long v8di __attribute__((vector_size(64))); + +v8hi +foo (v8hi a) +{ + return a < __extension__(v8hi) { 0, 0, 0, 0, 0, 0, 0, 0}; +} + +v16hi +foo2 (v16hi a) +{ + return a < __extension__(v16hi) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0}; +} + +v32hi +foo3 (v32hi a) +{ + return a < __extension__(v32hi) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0}; +} + +v4si +foo4 (v4si a) +{ + return a < __extension__(v4si) { 0, 0, 0, 0}; +} + +v8si +foo5 (v8si a) +{ + return a < __extension__(v8si) { 0, 0, 0, 0, 0, 0, 0, 0}; +} + +v16si +foo6 (v16si a) +{ + return a < __extension__(v16si) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0}; +} + +v2di +foo7 (v2di a) +{ + return a < __extension__(v2di) { 0, 0}; +} + +v4di +foo8 (v4di a) +{ + return a < __extension__(v4di) { 0, 0, 0, 0}; +} + +v8di +foo9 (v8di a) +{ + return a < __extension__(v8di) { 0, 0, 0, 0, 0, 0, 0, 0}; +} diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr115517.c b/gcc/testsuite/gcc.target/i386/avx2-pr115517.c new file mode 100644 index 00000000000..5b2620b0dc1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-pr115517.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O2" } */ +/* { dg-final { scan-assembler-times "vpsrad" 2 } } */ +/* { dg-final { scan-assembler-times "vpsraw" 2 } } */ + +typedef short v8hi __attribute__((vector_size(16))); +typedef short v16hi __attribute__((vector_size(32))); +typedef int v4si __attribute__((vector_size(16))); +typedef int v8si __attribute__((vector_size(32))); + +v8hi +foo (v8hi a) +{ + return a < __extension__(v8hi) { 0, 0, 0, 0, 0, 0, 0, 0}; +} + +v16hi +foo2 (v16hi a) +{ + return a < __extension__(v16hi) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0}; +} + +v4si +foo3 (v4si a) +{ + return a < __extension__(v4si) { 0, 0, 0, 0}; +} + +v8si +foo4 (v8si a) +{ + return a < __extension__(v8si) { 0, 0, 0, 0, 0, 0, 0, 0}; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512-pr115517.c b/gcc/testsuite/gcc.target/i386/avx512-pr115517.c new file mode 100644 index 00000000000..22df41bbdc9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512-pr115517.c @@ -0,0 +1,70 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vpsrad" 3 } } */ +/* { dg-final { scan-assembler-times "vpsraw" 3 } } */ +/* { dg-final { scan-assembler-times "vpsraq" 3 } } */ + +typedef short v8hi __attribute__((vector_size(16))); +typedef short v16hi __attribute__((vector_size(32))); +typedef short v32hi __attribute__((vector_size(64))); +typedef int v4si __attribute__((vector_size(16))); +typedef int v8si __attribute__((vector_size(32))); +typedef int v16si __attribute__((vector_size(64))); +typedef long long v2di __attribute__((vector_size(16))); +typedef long long v4di __attribute__((vector_size(32))); +typedef long long v8di __attribute__((vector_size(64))); + +v8hi +foo (v8hi a) +{ + return a < __extension__(v8hi) { 0, 0, 0, 0, 0, 0, 0, 0}; +} + +v16hi +foo2 (v16hi a) +{ + return a < __extension__(v16hi) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0}; +} + +v32hi +foo3 (v32hi a) +{ + return a < __extension__(v32hi) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0}; +} + +v4si +foo4 (v4si a) +{ + return a < __extension__(v4si) { 0, 0, 0, 0}; +} + +v8si +foo5 (v8si a) +{ + return a < __extension__(v8si) { 0, 0, 0, 0, 0, 0, 0, 0}; +} + +v16si +foo6 (v16si a) +{ + return a < __extension__(v16si) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0}; +} + +v2di +foo7 (v2di a) +{ + return a < __extension__(v2di) { 0, 0}; +} + +v4di +foo8 (v4di a) +{ + return a < __extension__(v4di) { 0, 0, 0, 0}; +} + +v8di +foo9 (v8di a) +{ + return a < __extension__(v8di) { 0, 0, 0, 0, 0, 0, 0, 0}; +} diff --git a/gcc/testsuite/gcc.target/i386/pr111023-2.c b/gcc/testsuite/gcc.target/i386/pr111023-2.c index 6c69f947544..ba52959b357 100644 --- a/gcc/testsuite/gcc.target/i386/pr111023-2.c +++ b/gcc/testsuite/gcc.target/i386/pr111023-2.c @@ -36,7 +36,7 @@ v4si_v4hi (v4si *dst, v8hi src) dst[0] = *(v4si *) tem; } -/* { dg-final { scan-assembler "pcmpgtw" } } */ +/* { dg-final { scan-assembler "(?:pcmpgtw|psraw)" } } */ /* { dg-final { scan-assembler "punpcklwd" } } */ void @@ -48,5 +48,5 @@ v2di_v2si (v2di *dst, v4si src) dst[0] = *(v2di *) tem; } -/* { dg-final { scan-assembler "pcmpgtd" } } */ +/* { dg-final { scan-assembler "(?:pcmpgtd|psrad)" } } */ /* { dg-final { scan-assembler "punpckldq" } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-div-1.c b/gcc/testsuite/gcc.target/i386/vect-div-1.c index f611088d8df..6d911290e06 100644 --- a/gcc/testsuite/gcc.target/i386/vect-div-1.c +++ b/gcc/testsuite/gcc.target/i386/vect-div-1.c @@ -40,4 +40,5 @@ f4 (int x) is always non-negative, so there is no need to do >> 31 shift etc. to check if it is. And in f3 and f4, VRP can prove it is always negative. */ -/* { dg-final { scan-assembler-not "psrad\[^\n\r\]*\\\$31" } } */ +/* Now (lt:v4si op1 const0_operand) is optimized to psrad, there're 20 of them. */ +/* { dg-final { scan-assembler-times "psrad\[^\n\r\]*\\\$31" 20 } } */ -- 2.31.1