Hi all,
This patch aims to acheive EQ/NE comparison between avx512 kmask and -1
by using kxortest with checking CF.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,-m64}. Ok for trunk?
BRs,
Lin
gcc/ChangeLog:
PR target/113609
* config/i386/sse.md
(*kortest_cmp<mode>_setcc): New define_insn_and_split.
(*kortest_cmp<mode>_jcc): Ditto.
gcc/testsuite/ChangeLog:
PR target/113609
* gcc.target/i386/pr113609-1.c: New test.
* gcc.target/i386/pr113609-2.c: Ditto.
---
gcc/config/i386/sse.md | 67 +++++++
gcc/testsuite/gcc.target/i386/pr113609-1.c | 194 +++++++++++++++++++++
gcc/testsuite/gcc.target/i386/pr113609-2.c | 161 +++++++++++++++++
3 files changed, 422 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/pr113609-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr113609-2.c
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b59c988fc31..34fd2e4afac 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2201,6 +2201,73 @@ (define_expand "kortest<mode>"
UNSPEC_KORTEST))]
"TARGET_AVX512F")
+;; Optimize cmp + setcc with mask register by kortest + setcc.
+(define_insn_and_split "*kortest_cmp<mode>_setcc"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm, qm")
+ (match_operator:QI 1 "bt_comparison_operator"
+ [(match_operand:SWI1248_AVX512BWDQ_64 2 "register_operand" "?k,
<r>")
+ (const_int -1)]))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512BW"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ if (MASK_REGNO_P (REGNO (operands[2])))
+ {
+ emit_insn (gen_kortest<mode>_ccc (operands[2], operands[2]));
+ operands[4] = gen_rtx_REG (CCCmode, FLAGS_REG);
+ }
+ else
+ {
+ operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG);
+ emit_insn (gen_rtx_SET (operands[4],
+ gen_rtx_COMPARE (CCZmode,
+ operands[2],
+ constm1_rtx)));
+ }
+ ix86_expand_setcc (operands[0],
+ GET_CODE (operands[1]),
+ operands[4],
+ const0_rtx);
+ DONE;
+})
+
+;; Optimize cmp + jcc with mask register by kortest + jcc.
+(define_insn_and_split "*kortest_cmp<mode>_jcc"
+ [(set (pc)
+ (if_then_else
+ (match_operator 0 "bt_comparison_operator"
+ [(match_operand:SWI1248_AVX512BWDQ_64 1 "register_operand" "?k, <r>")
+ (const_int -1)])
+ (label_ref (match_operand 2))
+ (pc)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512BW"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ if (MASK_REGNO_P (REGNO (operands[1])))
+ {
+ emit_insn (gen_kortest<mode>_ccc (operands[1], operands[1]));
+ operands[4] = gen_rtx_REG (CCCmode, FLAGS_REG);
+ }
+ else
+ {
+ operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG);
+ emit_insn (gen_rtx_SET (operands[4],
+ gen_rtx_COMPARE (CCZmode,
+ operands[1],
+ constm1_rtx)));
+ }
+ ix86_expand_branch (GET_CODE (operands[0]),
+ operands[4],
+ const0_rtx,
+ operands[2]);
+ DONE;
+})
+
(define_insn "kunpckhi"
[(set (match_operand:HI 0 "register_operand" "=k")
(ior:HI
diff --git a/gcc/testsuite/gcc.target/i386/pr113609-1.c
b/gcc/testsuite/gcc.target/i386/pr113609-1.c
new file mode 100644
index 00000000000..f0639b8500a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113609-1.c
@@ -0,0 +1,194 @@
+/* PR target/113609 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4" } */
+/* { dg-final { scan-assembler-not "^cmp" } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+sete" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+setne" { target { ! ia32 } } } }
*/
+/* { dg-final { scan-assembler-not "\[ \\t\]+je" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+jne" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+sete" 1 { target { ia32 } } } }
*/
+/* { dg-final { scan-assembler-times "\[ \\t\]+setne" 1 { target { ia32 } } }
} */
+/* { dg-final { scan-assembler-times "\[ \\t\]+je" 1 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+jne" 2 { target { ia32 } } } }
*/
+/* { dg-final { scan-assembler-times "kortest" 12 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "kortest" 17 { target { ! ia32 } } } } */
+
+#include <immintrin.h>
+
+unsigned int
+cmp_vector_sete_mask8(__m128i a, __m128i b)
+{
+ __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+ if (k == (__mmask8) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_sete_mask16(__m128i a, __m128i b)
+{
+ __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask16) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_sete_mask32(__m256i a, __m256i b)
+{
+ __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask32) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_sete_mask64(__m512i a, __m512i b)
+{
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask64) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask8(__m128i a, __m128i b)
+{
+ __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+ if (k != (__mmask8) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask16(__m128i a, __m128i b)
+{
+ __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+ if (k != (__mmask16) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask32(__m256i a, __m256i b)
+{
+ __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+ if (k != (__mmask32) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask64(__m512i a, __m512i b)
+{
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ if (k != (__mmask64) -1)
+ return 1;
+ else
+ return 0;
+}
+
+__m128i
+cmp_vector_je_mask8(__m128i a, __m128i b) {
+ __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+ if (k == (__mmask8) -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m128i
+cmp_vector_je_mask16(__m128i a, __m128i b) {
+ __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask16) -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m256i
+cmp_vector_je_mask32(__m256i a, __m256i b) {
+ __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask32) -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m512i
+cmp_vector_je_mask64(__m512i a, __m512i b) {
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask64) -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 5;
+ }
+ return a;
+}
+
+__m128i
+cmp_vector_jne_mask8(__m128i a, __m128i b) {
+ __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+ if (k == (__mmask8) -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m128i
+cmp_vector_jne_mask16(__m128i a, __m128i b) {
+ __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask16) -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m256i
+cmp_vector_jne_mask32(__m256i a, __m256i b) {
+ __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask32) -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m512i
+cmp_vector_jne_mask64(__m512i a, __m512i b) {
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask64) -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m512i
+mask_cmp_vector_jne_mask64(__m512i a, __m512i b) {
+ __mmask64 k = _mm512_mask_cmpeq_epi8_mask ((__mmask64)0xffffffefffffffff,
a, b);
+ if (k == (__mmask64) -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr113609-2.c
b/gcc/testsuite/gcc.target/i386/pr113609-2.c
new file mode 100644
index 00000000000..e9503f51538
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113609-2.c
@@ -0,0 +1,161 @@
+/* PR target/113609 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4" } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+sete" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+setne" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+je" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+jne" 4 } } */
+
+#include <immintrin.h>
+
+unsigned int
+cmp_pi8_setcc(char a)
+{
+ if (a == -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi16_setcc(short a)
+{
+ if (a == -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi32_setcc(int a)
+{
+ if (a == -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi64_setcc(long long a)
+{
+ if (a == -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi8_setne(char a)
+{
+ if (a != -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi16_setne(short a)
+{
+ if (a != -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi32_setne(int a)
+{
+ if (a != -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi64_setne(long long a)
+{
+ if (a != -1)
+ return 1;
+ else
+ return 0;
+}
+
+__m128i
+cmp_pi8_je(__m128i a, char b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m128i
+cmp_pi16_je(__m128i a, short b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m128i
+cmp_pi32_je(__m128i a, int b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m128i
+cmp_pi64_je(__m128i a, long long b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m128i
+cmp_pi8_jne(__m128i a, char b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m128i
+cmp_pi16_jne(__m128i a, short b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m128i
+cmp_pi32_jne(__m128i a, int b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m128i
+cmp_pi64_jne(__m128i a, long long b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
--
2.31.1