Hi!

As mentioned in the PR, vptestm* instructions with the same input operand used
twice perform the same comparison as vpcmpeq* against zero vector, with the
advantage that a register holding CONST0_RTX (mode) is not needed.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2018-05-23  Jakub Jelinek  <ja...@redhat.com>

        PR target/85832
        * config/i386/sse.md (<avx512>_eq<mode>3<mask_scalar_merge_name>_1):
        Add (=Yk,v,C) variant using vptestm insn.  Use TARGET_AVX512BW
        in test instead of TARGET_AVX512F for VI12_AVX512VL iterator.

        * gcc.target/i386/avx512f-pr85832.c: New test.
        * gcc.target/i386/avx512vl-pr85832.c: New test.
        * gcc.target/i386/avx512bw-pr85832.c: New test.
        * gcc.target/i386/avx512vlbw-pr85832.c: New test.

--- gcc/config/i386/sse.md.jj   2018-05-21 13:15:43.478581765 +0200
+++ gcc/config/i386/sse.md      2018-05-21 14:15:00.523635533 +0200
@@ -11210,26 +11210,30 @@ (define_expand "<avx512>_eq<mode>3<mask_
   "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
 
 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
-  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk,Yk")
        (unspec:<avx512fmaskmode>
-         [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "%v")
-          (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
+         [(match_operand:VI12_AVX512VL 1 "vector_move_operand" "%v,v")
+          (match_operand:VI12_AVX512VL 2 "vector_move_operand" "vm,C")]
          UNSPEC_MASKED_EQ))]
-  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "vpcmpeq<ssemodesuffix>\t{%2, %1, 
%0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
+  "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   vpcmpeq<ssemodesuffix>\t{%2, %1, 
%0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
+   vptestm<ssemodesuffix>\t{%1, %1, 
%0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
   [(set_attr "type" "ssecmp")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
-  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk,Yk")
        (unspec:<avx512fmaskmode>
-         [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
-          (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
+         [(match_operand:VI48_AVX512VL 1 "vector_move_operand" "%v,v")
+          (match_operand:VI48_AVX512VL 2 "vector_move_operand" "vm,C")]
          UNSPEC_MASKED_EQ))]
   "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "vpcmpeq<ssemodesuffix>\t{%2, %1, 
%0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
+  "@
+   vpcmpeq<ssemodesuffix>\t{%2, %1, 
%0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
+   vptestm<ssemodesuffix>\t{%1, %1, 
%0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
   [(set_attr "type" "ssecmp")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "evex")
--- gcc/testsuite/gcc.target/i386/avx512f-pr85832.c.jj  2018-05-21 
14:59:24.612544725 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-pr85832.c     2018-05-21 
14:59:03.448530016 +0200
@@ -0,0 +1,19 @@
+/* PR target/85832 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mno-avx512vl -mno-avx512bw -masm=att" } */
+/* { dg-final { scan-assembler-times {\mvptestmd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvptestmq\M} 1 } } */
+
+#include <x86intrin.h>
+
+int
+f1 (__m512i x)
+{
+  return _mm512_cmpeq_epi32_mask (x, _mm512_setzero_si512 ());
+}
+
+int
+f2 (__m512i x)
+{
+  return _mm512_cmpeq_epi64_mask (x, _mm512_setzero_si512 ());
+}
--- gcc/testsuite/gcc.target/i386/avx512vl-pr85832.c.jj 2018-05-21 
15:00:31.785591412 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-pr85832.c    2018-05-21 
15:00:17.872581739 +0200
@@ -0,0 +1,31 @@
+/* PR target/85832 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512bw -masm=att" } */
+/* { dg-final { scan-assembler-times {\mvptestmd\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvptestmq\M} 2 } } */
+
+#include <x86intrin.h>
+
+int
+f1 (__m256i x)
+{
+  return _mm256_cmpeq_epi32_mask (x, _mm256_setzero_si256 ());
+}
+
+int
+f2 (__m256i x)
+{
+  return _mm256_cmpeq_epi64_mask (x, _mm256_setzero_si256 ());
+}
+
+int
+f3 (__m128i x)
+{
+  return _mm_cmpeq_epi32_mask (x, _mm_setzero_si128 ());
+}
+
+int
+f4 (__m128i x)
+{
+  return _mm_cmpeq_epi64_mask (x, _mm_setzero_si128 ());
+}
--- gcc/testsuite/gcc.target/i386/avx512bw-pr85832.c.jj 2018-05-21 
15:11:46.587102827 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-pr85832.c    2018-05-21 
15:11:31.184090604 +0200
@@ -0,0 +1,19 @@
+/* PR target/85832 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mno-avx512vl -masm=att" } */
+/* { dg-final { scan-assembler-times {\mvptestmb\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvptestmw\M} 1 } } */
+
+#include <x86intrin.h>
+
+int
+f1 (__m512i x)
+{
+  return _mm512_cmpeq_epi8_mask (x, _mm512_setzero_si512 ());
+}
+
+int
+f2 (__m512i x)
+{
+  return _mm512_cmpeq_epi16_mask (x, _mm512_setzero_si512 ());
+}
--- gcc/testsuite/gcc.target/i386/avx512vlbw-pr85832.c.jj       2018-05-21 
15:12:41.021143390 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vlbw-pr85832.c  2018-05-21 
15:12:18.026126562 +0200
@@ -0,0 +1,31 @@
+/* PR target/85832 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mavx512bw -masm=att" } */
+/* { dg-final { scan-assembler-times {\mvptestmb\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvptestmw\M} 2 } } */
+
+#include <x86intrin.h>
+
+int
+f1 (__m256i x)
+{
+  return _mm256_cmpeq_epi8_mask (x, _mm256_setzero_si256 ());
+}
+
+int
+f2 (__m256i x)
+{
+  return _mm256_cmpeq_epi16_mask (x, _mm256_setzero_si256 ());
+}
+
+int
+f3 (__m128i x)
+{
+  return _mm_cmpeq_epi8_mask (x, _mm_setzero_si128 ());
+}
+
+int
+f4 (__m128i x)
+{
+  return _mm_cmpeq_epi16_mask (x, _mm_setzero_si128 ());
+}

        Jakub

Reply via email to