https://gcc.gnu.org/g:ee7e77e9c121f5a6f27c92b6b24b2abf9cd66a4d

commit r15-4560-gee7e77e9c121f5a6f27c92b6b24b2abf9cd66a4d
Author: liuhongt <hongtao....@intel.com>
Date:   Mon Oct 21 02:22:08 2024 -0700

    i386: Optimize EQ/NE comparison between avx512 kmask and -1.
    
    r15-974-gbf7745f887c765e06f2e75508f263debb60aeb2e has optimized for
    jcc/setcc, but missed movcc.
    The patch supports movcc.
    
    gcc/ChangeLog:
    
            PR target/117232
            * config/i386/sse.md 
(*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_movqicc):
            New define_insn_and_split.
            (*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_mov<SWI248:mode>cc):
            Ditto.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr117232-1.c: New test.
            * gcc.target/i386/pr117232-apx-1.c: New test.

Diff:
---
 gcc/config/i386/sse.md                         | 85 ++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr117232-1.c     | 47 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr117232-apx-1.c | 48 +++++++++++++++
 3 files changed, 180 insertions(+)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 6c28b74ac3f2..2345015db1b3 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2423,6 +2423,91 @@
   DONE;
 })
 
+;; Optimize cmp + movcc with mask register by kortest + movcc.
+(define_insn_and_split "*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_movqicc"
+   [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,r,r")
+      (if_then_else:QI
+       (match_operator 1 "bt_comparison_operator"
+         [(match_operand:SWI1248_AVX512BWDQ_64 4 "register_operand"
+         "?k,<SWI1248_AVX512BWDQ_64:r>,?k, <SWI1248_AVX512BWDQ_64:r>,?k,r")
+          (const_int -1)])
+       (match_operand:QI 2 "register_operand"  "r,r,0,0,r,r")
+       (match_operand:QI 3 "register_operand" " 0,0,r,r,r,r")))
+    (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512BW && TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+       (if_then_else:SI
+         (match_dup 5)
+         (match_dup 2)
+         (match_dup 3)))]
+{
+  rtx flag_reg;
+  if (MASK_REGNO_P (REGNO (operands[4])))
+    {
+      emit_insn (gen_kortest<SWI1248_AVX512BWDQ_64:mode>_ccc (operands[4], 
operands[4]));
+      flag_reg = gen_rtx_REG (CCCmode, FLAGS_REG);
+    }
+  else
+    {
+      flag_reg = gen_rtx_REG (CCZmode, FLAGS_REG);
+      emit_insn (gen_rtx_SET (flag_reg,
+                             gen_rtx_COMPARE (CCZmode,
+                                              operands[4],
+                                              constm1_rtx)));
+    }
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode,
+                               flag_reg,const0_rtx);
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[2] = gen_lowpart (SImode, operands[2]);
+  operands[3] = gen_lowpart (SImode, operands[3]);
+}
+  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "icmov")
+   (set_attr "mode" "QI")])
+
+(define_insn_and_split 
"*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_mov<SWI248:mode>cc"
+   [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r,r,r,r,r")
+      (if_then_else:SWI248
+       (match_operator 1 "bt_comparison_operator"
+         [(match_operand:SWI1248_AVX512BWDQ_64 4 "register_operand"
+         "?k,<SWI1248_AVX512BWDQ_64:r>,?k, <SWI1248_AVX512BWDQ_64:r>,?k,r,?k, 
r")
+          (const_int -1)])
+       (match_operand:SWI248 2 "nonimmediate_operand" "rm,rm, 0, 0,rm,rm, r, 
r")
+       (match_operand:SWI248 3 "nonimmediate_operand" " 0, 0,rm,rm, r, 
r,rm,rm")))
+    (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512BW && TARGET_CMOVE
+   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+       (if_then_else:SWI248
+         (match_dup 5)
+         (match_dup 2)
+         (match_dup 3)))]
+{
+  rtx flag_reg;
+  if (MASK_REGNO_P (REGNO (operands[4])))
+    {
+      emit_insn (gen_kortest<SWI1248_AVX512BWDQ_64:mode>_ccc (operands[4], 
operands[4]));
+      flag_reg = gen_rtx_REG (CCCmode, FLAGS_REG);
+    }
+  else
+    {
+      flag_reg = gen_rtx_REG (CCZmode, FLAGS_REG);
+      emit_insn (gen_rtx_SET (flag_reg,
+                             gen_rtx_COMPARE (CCZmode,
+                                              operands[4],
+                                              constm1_rtx)));
+    }
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode,
+                               flag_reg,const0_rtx);
+}
+  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd")
+   (set_attr "type" "icmov")
+   (set_attr "mode" "<SWI248:MODE>")])
+
 (define_insn "kunpckhi"
   [(set (match_operand:HI 0 "register_operand" "=k")
        (ior:HI
diff --git a/gcc/testsuite/gcc.target/i386/pr117232-1.c 
b/gcc/testsuite/gcc.target/i386/pr117232-1.c
new file mode 100644
index 000000000000..cd7f5d112a79
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117232-1.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times {(?n)kortest[bwqd]} 7 { target { ! ia32 } 
} } } */
+/* { dg-final { scan-assembler-times {(?n)cmovn?c} 7 { target { ! ia32 } } } } 
*/
+
+#include <immintrin.h>
+int
+foo (__m512i a, __m512i b, int c, int d) {
+  __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+  return k == (__mmask64) -1 ? c : d;
+}
+
+int
+foo1 (__m512i a, __m512i b, int c, int d) {
+  __mmask32 k = _mm512_cmpeq_epi16_mask (a, b);
+  return k == (__mmask32) -1 ? c : d;
+}
+
+int
+foo2 (__m512i a, __m512i b, int c, int d) {
+  __mmask16 k = _mm512_cmpeq_epi32_mask (a, b);
+  return k == (__mmask16) -1 ? c : d;
+}
+
+int
+foo3 (__m512i a, __m512i b, int c, int d) {
+  __mmask8 k = _mm512_cmpeq_epi64_mask (a, b);
+  return k == (__mmask8) -1 ? c : d;
+}
+
+short
+foo4 (__m512i a, __m512i b, short c, short d) {
+  __mmask8 k = _mm512_cmpeq_epi64_mask (a, b);
+  return k == (__mmask8) -1 ? c : d;
+}
+
+char
+foo5 (__m512i a, __m512i b, char c, char d) {
+  __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+  return k == (__mmask64) -1 ? c : d;
+}
+
+long long
+foo6 (__m512i a, __m512i b, long long c, long long d) {
+  __mmask16 k = _mm512_cmpeq_epi32_mask (a, b);
+  return k == (__mmask16) -1 ? c : d;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr117232-apx-1.c 
b/gcc/testsuite/gcc.target/i386/pr117232-apx-1.c
new file mode 100644
index 000000000000..e3571adf6dd2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117232-apx-1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512bw -mavx512vl -mavx512dq -mapxf -O2" } */
+/* { dg-final { scan-assembler-times {(?n)kortest[bwqd]} 7 } } */
+/* { dg-final { scan-assembler-times {(?n)cmovn?c} 7 } } */
+
+#include <immintrin.h>
+
+int
+foo (__m512i a, __m512i b, int c, int d) {
+  __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+  return k == (__mmask64) -1 ? c : d;
+}
+
+int
+foo1 (__m512i a, __m512i b, int c, int d) {
+  __mmask32 k = _mm512_cmpeq_epi16_mask (a, b);
+  return k == (__mmask32) -1 ? c : d;
+}
+
+int
+foo2 (__m512i a, __m512i b, int c, int d) {
+  __mmask16 k = _mm512_cmpeq_epi32_mask (a, b);
+  return k == (__mmask16) -1 ? c : d;
+}
+
+int
+foo3 (__m512i a, __m512i b, int c, int d) {
+  __mmask8 k = _mm512_cmpeq_epi64_mask (a, b);
+  return k == (__mmask8) -1 ? c : d;
+}
+
+short
+foo4 (__m512i a, __m512i b, short c, short d) {
+  __mmask8 k = _mm512_cmpeq_epi64_mask (a, b);
+  return k == (__mmask8) -1 ? c : d;
+}
+
+char
+foo5 (__m512i a, __m512i b, char c, char d) {
+  __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+  return k == (__mmask64) -1 ? c : d;
+}
+
+long long
+foo6 (__m512i a, __m512i b, long long c, long long d) {
+  __mmask16 k = _mm512_cmpeq_epi32_mask (a, b);
+  return k == (__mmask16) -1 ? c : d;
+}

Reply via email to