r15-974-gbf7745f887c765e06f2e75508f263debb60aeb2e has optimized for jcc/setcc, but missed movcc. The patch supports movcc.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Ready push to trunk. gcc/ChangeLog: PR target/117232 * config/i386/sse.md (*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_movqicc): New define_insn_and_split. (*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_mov<SWI248:mode>cc): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr117232-1.c: New test. * gcc.target/i386/pr117232-apx-1.c: New test. --- gcc/config/i386/sse.md | 85 +++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr117232-1.c | 47 ++++++++++ .../gcc.target/i386/pr117232-apx-1.c | 48 +++++++++++ 3 files changed, 180 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr117232-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr117232-apx-1.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 6c28b74ac3f..2345015db1b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2423,6 +2423,91 @@ (define_insn_and_split "*kortest_cmp<mode>_jcc" DONE; }) +;; Optimize cmp + movcc with mask register by kortest + movcc. +(define_insn_and_split "*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_movqicc" + [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,r,r") + (if_then_else:QI + (match_operator 1 "bt_comparison_operator" + [(match_operand:SWI1248_AVX512BWDQ_64 4 "register_operand" + "?k,<SWI1248_AVX512BWDQ_64:r>,?k, <SWI1248_AVX512BWDQ_64:r>,?k,r") + (const_int -1)]) + (match_operand:QI 2 "register_operand" "r,r,0,0,r,r") + (match_operand:QI 3 "register_operand" " 0,0,r,r,r,r"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512BW && TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL" + "#" + "&& reload_completed" + [(set (match_dup 0) + (if_then_else:SI + (match_dup 5) + (match_dup 2) + (match_dup 3)))] +{ + rtx flag_reg; + if (MASK_REGNO_P (REGNO (operands[4]))) + { + emit_insn (gen_kortest<SWI1248_AVX512BWDQ_64:mode>_ccc (operands[4], operands[4])); + flag_reg = gen_rtx_REG (CCCmode, FLAGS_REG); + } + else + { + flag_reg = gen_rtx_REG (CCZmode, FLAGS_REG); + emit_insn (gen_rtx_SET (flag_reg, + gen_rtx_COMPARE (CCZmode, + operands[4], + constm1_rtx))); + } + operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode, + flag_reg,const0_rtx); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_lowpart (SImode, operands[3]); +} + [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd") + (set_attr "type" "icmov") + (set_attr "mode" "QI")]) + +(define_insn_and_split "*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_mov<SWI248:mode>cc" + [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r,r,r,r,r") + (if_then_else:SWI248 + (match_operator 1 "bt_comparison_operator" + [(match_operand:SWI1248_AVX512BWDQ_64 4 "register_operand" + "?k,<SWI1248_AVX512BWDQ_64:r>,?k, <SWI1248_AVX512BWDQ_64:r>,?k,r,?k, r") + (const_int -1)]) + (match_operand:SWI248 2 "nonimmediate_operand" "rm,rm, 0, 0,rm,rm, r, r") + (match_operand:SWI248 3 "nonimmediate_operand" " 0, 0,rm,rm, r, r,rm,rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512BW && TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "#" + "&& reload_completed" + [(set (match_dup 0) + (if_then_else:SWI248 + (match_dup 5) + (match_dup 2) + (match_dup 3)))] +{ + rtx flag_reg; + if (MASK_REGNO_P (REGNO (operands[4]))) + { + emit_insn (gen_kortest<SWI1248_AVX512BWDQ_64:mode>_ccc (operands[4], operands[4])); + flag_reg = gen_rtx_REG (CCCmode, FLAGS_REG); + } + else + { + flag_reg = gen_rtx_REG (CCZmode, FLAGS_REG); + emit_insn (gen_rtx_SET (flag_reg, + gen_rtx_COMPARE (CCZmode, + operands[4], + constm1_rtx))); + } + operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode, + flag_reg,const0_rtx); +} + [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd") + (set_attr "type" "icmov") + (set_attr "mode" "<SWI248:MODE>")]) + (define_insn "kunpckhi" [(set (match_operand:HI 0 "register_operand" "=k") (ior:HI diff --git a/gcc/testsuite/gcc.target/i386/pr117232-1.c b/gcc/testsuite/gcc.target/i386/pr117232-1.c new file mode 100644 index 00000000000..cd7f5d112a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117232-1.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -mavx512vl -mavx512dq -O2" } */ +/* { dg-final { scan-assembler-times {(?n)kortest[bwqd]} 7 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times {(?n)cmovn?c} 7 { target { ! ia32 } } } } */ + +#include <immintrin.h> +int +foo (__m512i a, __m512i b, int c, int d) { + __mmask64 k = _mm512_cmpeq_epi8_mask (a, b); + return k == (__mmask64) -1 ? c : d; +} + +int +foo1 (__m512i a, __m512i b, int c, int d) { + __mmask32 k = _mm512_cmpeq_epi16_mask (a, b); + return k == (__mmask32) -1 ? c : d; +} + +int +foo2 (__m512i a, __m512i b, int c, int d) { + __mmask16 k = _mm512_cmpeq_epi32_mask (a, b); + return k == (__mmask16) -1 ? c : d; +} + +int +foo3 (__m512i a, __m512i b, int c, int d) { + __mmask8 k = _mm512_cmpeq_epi64_mask (a, b); + return k == (__mmask8) -1 ? c : d; +} + +short +foo4 (__m512i a, __m512i b, short c, short d) { + __mmask8 k = _mm512_cmpeq_epi64_mask (a, b); + return k == (__mmask8) -1 ? c : d; +} + +char +foo5 (__m512i a, __m512i b, char c, char d) { + __mmask64 k = _mm512_cmpeq_epi8_mask (a, b); + return k == (__mmask64) -1 ? c : d; +} + +long long +foo6 (__m512i a, __m512i b, long long c, long long d) { + __mmask16 k = _mm512_cmpeq_epi32_mask (a, b); + return k == (__mmask16) -1 ? c : d; +} diff --git a/gcc/testsuite/gcc.target/i386/pr117232-apx-1.c b/gcc/testsuite/gcc.target/i386/pr117232-apx-1.c new file mode 100644 index 00000000000..e3571adf6dd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117232-apx-1.c @@ -0,0 +1,48 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx512bw -mavx512vl -mavx512dq -mapxf -O2" } */ +/* { dg-final { scan-assembler-times {(?n)kortest[bwqd]} 7 } } */ +/* { dg-final { scan-assembler-times {(?n)cmovn?c} 7 } } */ + +#include <immintrin.h> + +int +foo (__m512i a, __m512i b, int c, int d) { + __mmask64 k = _mm512_cmpeq_epi8_mask (a, b); + return k == (__mmask64) -1 ? c : d; +} + +int +foo1 (__m512i a, __m512i b, int c, int d) { + __mmask32 k = _mm512_cmpeq_epi16_mask (a, b); + return k == (__mmask32) -1 ? c : d; +} + +int +foo2 (__m512i a, __m512i b, int c, int d) { + __mmask16 k = _mm512_cmpeq_epi32_mask (a, b); + return k == (__mmask16) -1 ? c : d; +} + +int +foo3 (__m512i a, __m512i b, int c, int d) { + __mmask8 k = _mm512_cmpeq_epi64_mask (a, b); + return k == (__mmask8) -1 ? c : d; +} + +short +foo4 (__m512i a, __m512i b, short c, short d) { + __mmask8 k = _mm512_cmpeq_epi64_mask (a, b); + return k == (__mmask8) -1 ? c : d; +} + +char +foo5 (__m512i a, __m512i b, char c, char d) { + __mmask64 k = _mm512_cmpeq_epi8_mask (a, b); + return k == (__mmask64) -1 ? c : d; +} + +long long +foo6 (__m512i a, __m512i b, long long c, long long d) { + __mmask16 k = _mm512_cmpeq_epi32_mask (a, b); + return k == (__mmask16) -1 ? c : d; +} -- 2.34.1