For comparison NEQ/LT/NLE, it's simplified to 0.
For comparison LE/EQ/NLT, it's simplied to (1u << nelt) - 1

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.

gcc/ChangeLog:

        PR target/122320
        * config/i386/sse.md (*<avx512>_cmp<mode>3_dup_op): New 
define_insn_and_split.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/pr122320-mask16.c: New test.
        * gcc.target/i386/pr122320-mask2.c: New test.
        * gcc.target/i386/pr122320-mask32.c: New test.
        * gcc.target/i386/pr122320-mask4.c: New test.
        * gcc.target/i386/pr122320-mask64.c: New test.
        * gcc.target/i386/pr122320-mask8.c: New test.
---
 gcc/config/i386/sse.md                        | 27 ++++++++++++++++
 .../gcc.target/i386/pr122320-mask16.c         | 32 +++++++++++++++++++
 .../gcc.target/i386/pr122320-mask2.c          | 32 +++++++++++++++++++
 .../gcc.target/i386/pr122320-mask32.c         | 32 +++++++++++++++++++
 .../gcc.target/i386/pr122320-mask4.c          | 32 +++++++++++++++++++
 .../gcc.target/i386/pr122320-mask64.c         | 32 +++++++++++++++++++
 .../gcc.target/i386/pr122320-mask8.c          | 32 +++++++++++++++++++
 7 files changed, 219 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr122320-mask16.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr122320-mask2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr122320-mask32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr122320-mask4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr122320-mask64.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr122320-mask8.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8b28c8edb19..4ad17f67b9d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4632,6 +4632,33 @@ (define_insn_and_split "*<avx512>_cmp<mode>3"
           UNSPEC_PCMP_ITER))]
   "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
 
+(define_insn_and_split "*<avx512>_cmp<mode>3_dup_op"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
+       (unspec:<avx512fmaskmode>
+         [(match_operand:VI1248_AVX512VLBW 1 "general_operand")
+          (match_operand:VI1248_AVX512VLBW 2 "general_operand")
+          (match_operand:SI 3 "<cmp_imm_predicate>")]
+         UNSPEC_PCMP_ITER))]
+  "TARGET_AVX512F && ix86_pre_reload_split ()
+   && rtx_equal_p (operands[1], operands[2])"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (match_dup 4))]
+{
+  int cmp_imm = INTVAL (operands[3]);
+  rtx res = CONST0_RTX (<avx512fmaskmode>mode);
+  /* EQ/LE/NLT.  */
+  if (cmp_imm == 0 || cmp_imm == 2 || cmp_imm == 5)
+  {
+    int nelts = GET_MODE_NUNITS (<MODE>mode);
+    if (nelts >= 8)
+      res = CONSTM1_RTX (<avx512fmaskmode>mode);
+    else
+      res = gen_int_mode ((1u << nelts) - 1, QImode);
+  }
+  operands[4] = res;
+})
+
 (define_insn "*<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
        (unspec:<avx512fmaskmode>
diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask16.c 
b/gcc/testsuite/gcc.target/i386/pr122320-mask16.c
new file mode 100644
index 00000000000..2796d748d46
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr122320-mask16.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-not "vpcmp" } } */
+
+#include <immintrin.h>
+
+__mmask16 dumpy_eq (__m512i vx){
+  return _mm512_cmp_epi32_mask (vx, vx, 0);
+}
+
+__mmask16 dumpy_lt (__m512i vx)
+{
+  return _mm512_cmp_epi32_mask (vx, vx, 1);
+}
+
+__mmask16 dumpy_le (__m512i vx){
+  return _mm512_cmp_epi32_mask (vx, vx, 2);
+}
+
+__mmask16 dumpy_ne (__m512i vx)
+{
+  return _mm512_cmp_epi32_mask (vx, vx, 4);
+}
+
+__mmask16 dumpy_nlt (__m512i vx)
+{
+  return _mm512_cmp_epi32_mask (vx, vx, 5);
+}
+
+__mmask16 dumpy_nle (__m512i vx){
+  return _mm512_cmp_epi32_mask (vx, vx, 6);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask2.c 
b/gcc/testsuite/gcc.target/i386/pr122320-mask2.c
new file mode 100644
index 00000000000..bcbc47aef5c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr122320-mask2.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-not "vpcmp" } } */
+
+#include <immintrin.h>
+
+__mmask8 dumpy_eq (__m128i vx){
+  return _mm_cmp_epi64_mask (vx, vx, 0);
+}
+
+__mmask8 dumpy_lt (__m128i vx)
+{
+  return _mm_cmp_epi64_mask (vx, vx, 1);
+}
+
+__mmask8 dumpy_le (__m128i vx){
+  return _mm_cmp_epi64_mask (vx, vx, 2);
+}
+
+__mmask8 dumpy_ne (__m128i vx)
+{
+  return _mm_cmp_epi64_mask (vx, vx, 4);
+}
+
+__mmask8 dumpy_nlt (__m128i vx)
+{
+  return _mm_cmp_epi64_mask (vx, vx, 5);
+}
+
+__mmask8 dumpy_nle (__m128i vx){
+  return _mm_cmp_epi64_mask (vx, vx, 6);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask32.c 
b/gcc/testsuite/gcc.target/i386/pr122320-mask32.c
new file mode 100644
index 00000000000..d75c8b0dfac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr122320-mask32.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-not "vpcmp" } } */
+
+#include <immintrin.h>
+
+__mmask32 dumpy_eq (__m512i vx){
+  return _mm512_cmp_epi16_mask (vx, vx, 0);
+}
+
+__mmask32 dumpy_lt (__m512i vx)
+{
+  return _mm512_cmp_epi16_mask (vx, vx, 1);
+}
+
+__mmask32 dumpy_le (__m512i vx){
+  return _mm512_cmp_epi16_mask (vx, vx, 2);
+}
+
+__mmask32 dumpy_ne (__m512i vx)
+{
+  return _mm512_cmp_epi16_mask (vx, vx, 4);
+}
+
+__mmask32 dumpy_nlt (__m512i vx)
+{
+  return _mm512_cmp_epi16_mask (vx, vx, 5);  
+}
+
+__mmask32 dumpy_nle (__m512i vx){
+  return _mm512_cmp_epi16_mask (vx, vx, 6);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask4.c 
b/gcc/testsuite/gcc.target/i386/pr122320-mask4.c
new file mode 100644
index 00000000000..7f2ec7d5f22
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr122320-mask4.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-not "vpcmp" } } */
+
+#include <immintrin.h>
+
+__mmask8 dumpy_eq (__m256i vx){
+  return _mm256_cmp_epi64_mask (vx, vx, 0);
+}
+
+__mmask8 dumpy_lt (__m256i vx)
+{
+  return _mm256_cmp_epi64_mask (vx, vx, 1);
+}
+
+__mmask8 dumpy_le (__m256i vx){
+  return _mm256_cmp_epi64_mask (vx, vx, 2);
+}
+
+__mmask8 dumpy_ne (__m256i vx)
+{
+  return _mm256_cmp_epi64_mask (vx, vx, 4);
+}
+
+__mmask8 dumpy_nlt (__m256i vx)
+{
+  return _mm256_cmp_epi64_mask (vx, vx, 5);  
+}
+
+__mmask8 dumpy_nle (__m256i vx){
+  return _mm256_cmp_epi64_mask (vx, vx, 6);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask64.c 
b/gcc/testsuite/gcc.target/i386/pr122320-mask64.c
new file mode 100644
index 00000000000..6a7ce5112c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr122320-mask64.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-not "vpcmp" } } */
+
+#include <immintrin.h>
+
+__mmask64 dumpy_eq (__m512i vx){
+  return _mm512_cmp_epi8_mask (vx, vx, 0);
+}
+
+__mmask64 dumpy_lt (__m512i vx)
+{
+  return _mm512_cmp_epi8_mask (vx, vx, 1);
+}
+
+__mmask64 dumpy_le (__m512i vx){
+  return _mm512_cmp_epi8_mask (vx, vx, 2);
+}
+
+__mmask64 dumpy_ne (__m512i vx)
+{
+  return _mm512_cmp_epi8_mask (vx, vx, 4);
+}
+
+__mmask64 dumpy_nlt (__m512i vx)
+{
+  return _mm512_cmp_epi8_mask (vx, vx, 5);
+}
+
+__mmask64 dumpy_nle (__m512i vx){
+  return _mm512_cmp_epi8_mask (vx, vx, 6);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask8.c 
b/gcc/testsuite/gcc.target/i386/pr122320-mask8.c
new file mode 100644
index 00000000000..e724a68e7eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr122320-mask8.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-not "vpcmp" } } */
+
+#include <immintrin.h>
+
+__mmask8 dumpy_eq (__m512i vx){
+  return _mm512_cmp_epi64_mask (vx, vx, 0);
+}
+
+__mmask8 dumpy_lt (__m512i vx)
+{
+  return _mm512_cmp_epi64_mask (vx, vx, 1);
+}
+
+__mmask8 dumpy_le (__m512i vx){
+  return _mm512_cmp_epi64_mask (vx, vx, 2);
+}
+
+__mmask8 dumpy_ne (__m512i vx)
+{
+  return _mm512_cmp_epi64_mask (vx, vx, 4);
+}
+
+__mmask8 dumpy_nlt (__m512i vx)
+{
+  return _mm512_cmp_epi64_mask (vx, vx, 5);
+}
+
+__mmask8 dumpy_nle (__m512i vx){
+  return _mm512_cmp_epi64_mask (vx, vx, 6);
+}
-- 
2.34.1

Reply via email to