r15-974-gbf7745f887c765e06f2e75508f263debb60aeb2e has optimized for
jcc/setcc, but missed movcc.
The patch supports movcc.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.

gcc/ChangeLog:

        PR target/117232
        * config/i386/sse.md (*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_movqicc):
        New define_insn_and_split.
        (*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_mov<SWI248:mode>cc):
        Ditto.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/pr117232-1.c: New test.
        * gcc.target/i386/pr117232-apx-1.c: New test.
---
 gcc/config/i386/sse.md                        | 85 +++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr117232-1.c    | 47 ++++++++++
 .../gcc.target/i386/pr117232-apx-1.c          | 48 +++++++++++
 3 files changed, 180 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr117232-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr117232-apx-1.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 6c28b74ac3f..2345015db1b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2423,6 +2423,91 @@ (define_insn_and_split "*kortest_cmp<mode>_jcc"
   DONE;
 })
 
+;; Optimize cmp + movcc with mask register by kortest + movcc.
+(define_insn_and_split "*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_movqicc"
+   [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,r,r")
+      (if_then_else:QI
+       (match_operator 1 "bt_comparison_operator"
+         [(match_operand:SWI1248_AVX512BWDQ_64 4 "register_operand"
+         "?k,<SWI1248_AVX512BWDQ_64:r>,?k, <SWI1248_AVX512BWDQ_64:r>,?k,r")
+          (const_int -1)])
+       (match_operand:QI 2 "register_operand"  "r,r,0,0,r,r")
+       (match_operand:QI 3 "register_operand" " 0,0,r,r,r,r")))
+    (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512BW && TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+       (if_then_else:SI
+         (match_dup 5)
+         (match_dup 2)
+         (match_dup 3)))]
+{
+  rtx flag_reg;
+  if (MASK_REGNO_P (REGNO (operands[4])))
+    {
+      emit_insn (gen_kortest<SWI1248_AVX512BWDQ_64:mode>_ccc (operands[4], 
operands[4]));
+      flag_reg = gen_rtx_REG (CCCmode, FLAGS_REG);
+    }
+  else
+    {
+      flag_reg = gen_rtx_REG (CCZmode, FLAGS_REG);
+      emit_insn (gen_rtx_SET (flag_reg,
+                             gen_rtx_COMPARE (CCZmode,
+                                              operands[4],
+                                              constm1_rtx)));
+    }
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode,
+                               flag_reg,const0_rtx);
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[2] = gen_lowpart (SImode, operands[2]);
+  operands[3] = gen_lowpart (SImode, operands[3]);
+}
+  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "icmov")
+   (set_attr "mode" "QI")])
+
+(define_insn_and_split 
"*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_mov<SWI248:mode>cc"
+   [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r,r,r,r,r")
+      (if_then_else:SWI248
+       (match_operator 1 "bt_comparison_operator"
+         [(match_operand:SWI1248_AVX512BWDQ_64 4 "register_operand"
+         "?k,<SWI1248_AVX512BWDQ_64:r>,?k, <SWI1248_AVX512BWDQ_64:r>,?k,r,?k, 
r")
+          (const_int -1)])
+       (match_operand:SWI248 2 "nonimmediate_operand" "rm,rm, 0, 0,rm,rm, r, 
r")
+       (match_operand:SWI248 3 "nonimmediate_operand" " 0, 0,rm,rm, r, 
r,rm,rm")))
+    (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512BW && TARGET_CMOVE
+   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+       (if_then_else:SWI248
+         (match_dup 5)
+         (match_dup 2)
+         (match_dup 3)))]
+{
+  rtx flag_reg;
+  if (MASK_REGNO_P (REGNO (operands[4])))
+    {
+      emit_insn (gen_kortest<SWI1248_AVX512BWDQ_64:mode>_ccc (operands[4], 
operands[4]));
+      flag_reg = gen_rtx_REG (CCCmode, FLAGS_REG);
+    }
+  else
+    {
+      flag_reg = gen_rtx_REG (CCZmode, FLAGS_REG);
+      emit_insn (gen_rtx_SET (flag_reg,
+                             gen_rtx_COMPARE (CCZmode,
+                                              operands[4],
+                                              constm1_rtx)));
+    }
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode,
+                               flag_reg,const0_rtx);
+}
+  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd")
+   (set_attr "type" "icmov")
+   (set_attr "mode" "<SWI248:MODE>")])
+
 (define_insn "kunpckhi"
   [(set (match_operand:HI 0 "register_operand" "=k")
        (ior:HI
diff --git a/gcc/testsuite/gcc.target/i386/pr117232-1.c 
b/gcc/testsuite/gcc.target/i386/pr117232-1.c
new file mode 100644
index 00000000000..cd7f5d112a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117232-1.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times {(?n)kortest[bwqd]} 7 { target { ! ia32 } 
} } } */
+/* { dg-final { scan-assembler-times {(?n)cmovn?c} 7 { target { ! ia32 } } } } 
*/
+
+#include <immintrin.h>
+int
+foo (__m512i a, __m512i b, int c, int d) {
+  __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+  return k == (__mmask64) -1 ? c : d;
+}
+
+int
+foo1 (__m512i a, __m512i b, int c, int d) {
+  __mmask32 k = _mm512_cmpeq_epi16_mask (a, b);
+  return k == (__mmask32) -1 ? c : d;
+}
+
+int
+foo2 (__m512i a, __m512i b, int c, int d) {
+  __mmask16 k = _mm512_cmpeq_epi32_mask (a, b);
+  return k == (__mmask16) -1 ? c : d;
+}
+
+int
+foo3 (__m512i a, __m512i b, int c, int d) {
+  __mmask8 k = _mm512_cmpeq_epi64_mask (a, b);
+  return k == (__mmask8) -1 ? c : d;
+}
+
+short
+foo4 (__m512i a, __m512i b, short c, short d) {
+  __mmask8 k = _mm512_cmpeq_epi64_mask (a, b);
+  return k == (__mmask8) -1 ? c : d;
+}
+
+char
+foo5 (__m512i a, __m512i b, char c, char d) {
+  __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+  return k == (__mmask64) -1 ? c : d;
+}
+
+long long
+foo6 (__m512i a, __m512i b, long long c, long long d) {
+  __mmask16 k = _mm512_cmpeq_epi32_mask (a, b);
+  return k == (__mmask16) -1 ? c : d;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr117232-apx-1.c 
b/gcc/testsuite/gcc.target/i386/pr117232-apx-1.c
new file mode 100644
index 00000000000..e3571adf6dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117232-apx-1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512bw -mavx512vl -mavx512dq -mapxf -O2" } */
+/* { dg-final { scan-assembler-times {(?n)kortest[bwqd]} 7 } } */
+/* { dg-final { scan-assembler-times {(?n)cmovn?c} 7 } } */
+
+#include <immintrin.h>
+
+int
+foo (__m512i a, __m512i b, int c, int d) {
+  __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+  return k == (__mmask64) -1 ? c : d;
+}
+
+int
+foo1 (__m512i a, __m512i b, int c, int d) {
+  __mmask32 k = _mm512_cmpeq_epi16_mask (a, b);
+  return k == (__mmask32) -1 ? c : d;
+}
+
+int
+foo2 (__m512i a, __m512i b, int c, int d) {
+  __mmask16 k = _mm512_cmpeq_epi32_mask (a, b);
+  return k == (__mmask16) -1 ? c : d;
+}
+
+int
+foo3 (__m512i a, __m512i b, int c, int d) {
+  __mmask8 k = _mm512_cmpeq_epi64_mask (a, b);
+  return k == (__mmask8) -1 ? c : d;
+}
+
+short
+foo4 (__m512i a, __m512i b, short c, short d) {
+  __mmask8 k = _mm512_cmpeq_epi64_mask (a, b);
+  return k == (__mmask8) -1 ? c : d;
+}
+
+char
+foo5 (__m512i a, __m512i b, char c, char d) {
+  __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+  return k == (__mmask64) -1 ? c : d;
+}
+
+long long
+foo6 (__m512i a, __m512i b, long long c, long long d) {
+  __mmask16 k = _mm512_cmpeq_epi32_mask (a, b);
+  return k == (__mmask16) -1 ? c : d;
+}
-- 
2.34.1

Reply via email to