Hi,

According to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97770, x86
backend need popcount<mode>2 expander so __builtin_popcount could be
auto vectorized with AVX512BITALG/AVX512VPOPCNTDQ targets.

For DImode the middle-end vectorizer could not generate expected code,
and for QI/HImode there is no corresponding IFN, xfails are added for
these tests.

Bootstrap/regression test for x86 backend is OK.

OK for master?

gcc/ChangeLog

    PR target/97770
    * gcc/config/i386/sse.md (popcount<mode>2): New expander
    for SI/DI vector modes.
    (popcount<mode>2): Likewise for QI/HI vector modes.

gcc/testsuite/ChangeLog

    PR target/97770
    * gcc.target/i386/avx512bitalg-pr97770-1.c: New test.
    * gcc.target/i386/avx512vpopcntdq-pr97770-1.c: Likewise.
    * gcc.target/i386/avx512vpopcntdq-pr97770-2.c: Likewise.
    * gcc.target/i386/avx512vpopcntdqvl-pr97770-1.c: Likewise.

-- 
Regards,

Hongyu, Wang
From b809052b0bab5d80dd0a1b1ffbd55faa8179a416 Mon Sep 17 00:00:00 2001
From: Hongyu Wang <hongyu.w...@intel.com>
Date: Wed, 11 Nov 2020 09:41:13 +0800
Subject: [PATCH] Add popcount<mode> expander to enable popcount auto
 vectorization under AVX512BITALG/AVX512POPCNTDQ target.

gcc/ChangeLog

	PR target/97770
	* gcc/config/i386/sse.md (popcount<mode>2): New expander
	for SI/DI vector modes.
	(popcount<mode>2): Likewise for QI/HI vector modes.

gcc/testsuite/ChangeLog

	PR target/97770
	* gcc.target/i386/avx512bitalg-pr97770-1.c: New test.
	* gcc.target/i386/avx512vpopcntdq-pr97770-1.c: Likewise.
	* gcc.target/i386/avx512vpopcntdq-pr97770-2.c: Likewise.
	* gcc.target/i386/avx512vpopcntdqvl-pr97770-1.c: Likewise.
---
 gcc/config/i386/sse.md                        | 12 ++++
 .../gcc.target/i386/avx512bitalg-pr97770-1.c  | 60 ++++++++++++++++++
 .../i386/avx512vpopcntdq-pr97770-1.c          | 63 +++++++++++++++++++
 .../i386/avx512vpopcntdq-pr97770-2.c          | 39 ++++++++++++
 .../i386/avx512vpopcntdqvl-pr97770-1.c        | 14 +++++
 5 files changed, 188 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-pr97770-1.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8437ad27087..8566b2ccda2 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -22678,6 +22678,12 @@ (define_insn "avx5124vnniw_vp4dpwssds_maskz"
     (set_attr ("prefix") ("evex"))
     (set_attr ("mode") ("TI"))])
 
+(define_expand "popcount<mode>2"
+  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
+	(popcount:VI48_AVX512VL
+	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")))]
+  "TARGET_AVX512VPOPCNTDQ")
+
 (define_insn "vpopcount<mode><mask_name>"
   [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
 	(popcount:VI48_AVX512VL
@@ -22722,6 +22728,12 @@ (define_insn "*restore_multiple_leave_return<mode>"
   "TARGET_SSE && TARGET_64BIT"
   "jmp\t%P1")
 
+(define_expand "popcount<mode>2"
+  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+	(popcount:VI12_AVX512VL
+	  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512BITALG")
+
 (define_insn "vpopcount<mode><mask_name>"
   [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
 	(popcount:VI12_AVX512VL
diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
new file mode 100644
index 00000000000..c83a477045c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
@@ -0,0 +1,60 @@
+/* PR target/97770 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bitalg -mavx512vl -mprefer-vector-width=512" } */
+/* Add xfail since no IFN for QI/HImode popcount */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
+
+#include <immintrin.h>
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountb_128 (char * __restrict dest, char* src)
+{
+  for (int i = 0; i != 16; i++)
+    dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountw_128 (short* __restrict dest, short* src)
+{
+  for (int i = 0; i != 8; i++)
+    dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountb_256 (char * __restrict dest, char* src)
+{
+  for (int i = 0; i != 32; i++)
+    dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountw_256 (short* __restrict dest, short* src)
+{
+  for (int i = 0; i != 16; i++)
+    dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountb_512 (char * __restrict dest, char* src)
+{
+  for (int i = 0; i != 64; i++)
+    dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountw_512 (short* __restrict dest, short* src)
+{
+  for (int i = 0; i != 32; i++)
+    dest[i] = __builtin_popcount (src[i]);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
new file mode 100644
index 00000000000..63bb00d9b4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
@@ -0,0 +1,63 @@
+/* PR target/97770 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl -mprefer-vector-width=512" } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
+/* Add xfail since current vectorizor cannot generate expected code for DImode popcount */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*xmm" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*ymm" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*zmm" 1 { xfail *-*-* } } } */
+#ifndef AVX512VPOPCNTQ_H_INCLUDED
+#define AVX512VPOPCNTQ_H_INCLUDED
+
+#include <immintrin.h>
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountd_128 (int* __restrict dest, int* src)
+{
+  for (int i = 0; i != 4; i++)
+    dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountq_128 (long long* __restrict dest, long long* src)
+{
+  for (int i = 0; i != 2; i++)
+    dest[i] = __builtin_popcountll (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountd_256 (int* __restrict dest, int* src)
+{
+  for (int i = 0; i != 8; i++)
+    dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountq_256 (long long* __restrict dest, long long* src)
+{
+  for (int i = 0; i != 4; i++)
+    dest[i] = __builtin_popcountll (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountd_512 (int* __restrict dest, int* src)
+{
+  for (int i = 0; i != 16; i++)
+    dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountq_512 (long long* __restrict dest, long long* src)
+{
+  for (int i = 0; i != 8; i++)
+    dest[i] = __builtin_popcountll (src[i]);
+}
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-2.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-2.c
new file mode 100644
index 00000000000..339dc29023b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vpopcntdq" } */
+
+#define AVX512VPOPCNTDQ
+
+#include "avx512f-helper.h"
+#include "avx512vpopcntdq-pr97770-1.c"
+
+#define SIZE_D AVX512F_LEN / 32
+#define SIZE_Q AVX512F_LEN / 64
+
+
+#define RTEST(TYPE, LEN, SIZE, MODE)			\
+  do							\
+    {							\
+      TYPE res[SIZE], src[SIZE], res_ref[SIZE], v;	\
+      int i, j, ret;					\
+      for (i = 0; i < SIZE; i++)			\
+	{						\
+	  v = src[i] = i * 2 + 3;			\
+	  ret = 0;					\
+	  for (j = 0; j < sizeof(v) * 8; j++)		\
+	    if ((v & ((TYPE)1 << (TYPE) j)))		\
+	      ret++;					\
+	  res_ref[i] = ret;				\
+	}						\
+      EVAL(popcount, MODE, LEN) (res, src);		\
+	for (i = 0; i < SIZE; i++)			\
+	  if (res[i] != res_ref[i])			\
+	    abort ();					\
+    }							\
+  while (0)
+
+void
+TEST (void)
+{
+  RTEST (long long, AVX512F_LEN, SIZE_Q, q_);
+  RTEST (int, AVX512F_LEN, SIZE_D, d_);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-pr97770-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-pr97770-1.c
new file mode 100644
index 00000000000..7a34f152d01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-pr97770-1.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mavx512vpopcntdq -mavx512vl" } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512vpopcntdq-pr97770-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512vpopcntdq-pr97770-2.c"
-- 
2.20.1

Reply via email to