For instruction sequence like
kmovb %k0, %edx
kmovb %k1, %ecx
orb %cl, %dl
je .L5
if only CCZ is cared, it can be optimized to
kortestb %k1, %k0
je .L5
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32}.
Ready push to trunk.
gcc/ChangeLog:
* config/i386/i386.md (*ior<mode>_ccz_1): New define_insn.
gcc/testsuite/ChangeLog:
* gcc.target/i386/kortest_ccz-1.c: New test.
---
gcc/config/i386/i386.md | 16 ++++++++++++++++
gcc/testsuite/gcc.target/i386/kortest_ccz-1.c | 13 +++++++++++++
2 files changed, 29 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/kortest_ccz-1.c
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3ea2439526b..ae26ef02c22 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14203,6 +14203,22 @@ (define_insn "*<code>si_2_zext_imm"
(set_attr "isa" "*,apx_ndd")
(set_attr "mode" "SI")])
+;; It must be put before *<code><mode>_3, the blow one.
+(define_insn "*ior<mode>_ccz_1"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (ior:SWI1248_AVX512BWDQ_64
+ (match_operand:SWI1248_AVX512BWDQ_64 1 "nonimmediate_operand" "%0,?k")
+ (match_operand:SWI1248_AVX512BWDQ_64 2 "<general_operand>" "<g>, k"))
+ (const_int 0)))
+ (clobber (match_scratch:SWI1248_AVX512BWDQ_64 0 "=<r>, k"))]
+ "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ or{<imodesuffix>}\t{%2, %0|%0, %2}
+ kortest<mskmodesuffix>\t{%1, %2|%2, %1}"
+ [(set_attr "type" "alu,msklog")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*<code><mode>_3"
[(set (reg FLAGS_REG)
(compare (any_or:SWI
diff --git a/gcc/testsuite/gcc.target/i386/kortest_ccz-1.c
b/gcc/testsuite/gcc.target/i386/kortest_ccz-1.c
new file mode 100644
index 00000000000..0f8d4bf9f22
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/kortest_ccz-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O3" } */
+/* { dg-final { scan-assembler-not "kmov" } } */
+/* { dg-final { scan-assembler "kortest" } } */
+
+int
+foo (int *__restrict a, int* __restrict d, int b, int c, int n)
+{
+ for (int i = 0; i != 10000; i++)
+ if (a[i] > b || d[i] > c)
+ return 1;
+ return 0;
+}
--
2.34.1