On 8/11/21 9:10 PM, Kewen.Lin wrote:
Hi Bill,

Thanks for your prompt review!

on 2021/8/12 上午12:34, Bill Schmidt wrote:
Hi Kewen,

FWIW, it's easier on reviewers if you include the patch inline instead of as an 
attachment.

On 8/11/21 1:56 AM, Kewen.Lin wrote:
Hi,

This patch is to add the support to make vectorizer able to
vectorize scalar version of some built-in functions with its
corresponding vector version with Power10 support.

Bootstrapped & regtested on powerpc64le-linux-gnu {P9,P10}
and powerpc64-linux-gnu P8.

Is it ok for trunk?

BR,
Kewen
-----
gcc/ChangeLog:

        * config/rs6000/rs6000.c (rs6000_builtin_md_vectorized_function): Add
        support for some built-in functions vectorized on Power10.

gcc/testsuite/ChangeLog:

        * gcc.target/powerpc/dive-vectorize-1.c: New test.
        * gcc.target/powerpc/dive-vectorize-1.h: New test.
        * gcc.target/powerpc/dive-vectorize-2.c: New test.
        * gcc.target/powerpc/dive-vectorize-2.h: New test.
        * gcc.target/powerpc/dive-vectorize-run-1.c: New test.
        * gcc.target/powerpc/dive-vectorize-run-2.c: New test.
        * gcc.target/powerpc/p10-bifs-vectorize-1.c: New test.
        * gcc.target/powerpc/p10-bifs-vectorize-1.h: New test.
        * gcc.target/powerpc/p10-bifs-vectorize-run-1.c: New test.
---
  gcc/config/rs6000/rs6000.c                    | 55 +++++++++++++++++++
  .../gcc.target/powerpc/dive-vectorize-1.c     | 11 ++++
  .../gcc.target/powerpc/dive-vectorize-1.h     | 22 ++++++++
  .../gcc.target/powerpc/dive-vectorize-2.c     | 12 ++++
  .../gcc.target/powerpc/dive-vectorize-2.h     | 22 ++++++++
  .../gcc.target/powerpc/dive-vectorize-run-1.c | 52 ++++++++++++++++++
  .../gcc.target/powerpc/dive-vectorize-run-2.c | 53 ++++++++++++++++++
  .../gcc.target/powerpc/p10-bifs-vectorize-1.c | 15 +++++
  .../gcc.target/powerpc/p10-bifs-vectorize-1.h | 40 ++++++++++++++
  .../powerpc/p10-bifs-vectorize-run-1.c        | 45 +++++++++++++++
  10 files changed, 327 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.h
  create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.h
  create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-1.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-2.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.h
  create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-run-1.c

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 279f00cc648..3eac1d05101 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5785,6 +5785,61 @@ rs6000_builtin_md_vectorized_function (tree fndecl, tree 
type_out,
      default:
        break;
      }
+
+  machine_mode in_vmode = TYPE_MODE (type_in);
+  machine_mode out_vmode = TYPE_MODE (type_out);
+
+  /* Power10 supported vectorized built-in functions.  */
+  if (TARGET_POWER10
+      && in_vmode == out_vmode
+      && VECTOR_UNIT_ALTIVEC_OR_VSX_P (in_vmode))
+    {
+      machine_mode exp_mode = DImode;
+      machine_mode exp_vmode = V2DImode;
+      enum rs6000_builtins vname = RS6000_BUILTIN_COUNT;

Using this as a flag value looks unnecessary.  Is this just being done to 
silence a warning?

Good question!  I didn't notice there is a warning or not, just get used to 
initializing variable
with one suitable value if possible.  If you don't mind, may I still keep it?  
Since if some
future codes use vname in a path where it's not assigned, one explicitly wrong 
enum (bif) seems
better than a random one.  Or will this mentioned possibility definitely never 
happen since the
current uninitialized variables detection and warning scheme is robust and 
should not worry about
that completely?


I don't feel strongly about this either way; up to you and the maintainers. :)

Bill


+      switch (fn)
+       {
+       case MISC_BUILTIN_DIVWE:
+       case MISC_BUILTIN_DIVWEU:
+         exp_mode = SImode;
+         exp_vmode = V4SImode;
+         if (fn == MISC_BUILTIN_DIVWE)
+           vname = P10V_BUILTIN_DIVES_V4SI;
+         else
+           vname = P10V_BUILTIN_DIVEU_V4SI;
+         break;
+       case MISC_BUILTIN_DIVDE:
+       case MISC_BUILTIN_DIVDEU:
+         if (fn == MISC_BUILTIN_DIVDE)
+           vname = P10V_BUILTIN_DIVES_V2DI;
+         else
+           vname = P10V_BUILTIN_DIVEU_V2DI;
+         break;
+       case P10_BUILTIN_CFUGED:
+         vname = P10V_BUILTIN_VCFUGED;
+         break;
+       case P10_BUILTIN_CNTLZDM:
+         vname = P10V_BUILTIN_VCLZDM;
+         break;
+       case P10_BUILTIN_CNTTZDM:
+         vname = P10V_BUILTIN_VCTZDM;
+         break;
+       case P10_BUILTIN_PDEPD:
+         vname = P10V_BUILTIN_VPDEPD;
+         break;
+       case P10_BUILTIN_PEXTD:
+         vname = P10V_BUILTIN_VPEXTD;
+         break;
+       default:
+         return NULL_TREE;
+       }
+
+      if (vname != RS6000_BUILTIN_COUNT

Check is not necessary, as you will have returned by now in that case.

Thanks for catching, I put break for "default" initially, didn't noticed the 
following condition
need an adjustment after updating it to early return.  Will fix it.

BR,
Kewen

Otherwise this patch LGTM.  Thanks!  Still needs maintainer approval, of course.

Bill

+         && in_mode == exp_mode
+         && in_vmode == exp_vmode)
+       return rs6000_builtin_decls[vname];
+    }
+
    return NULL_TREE;
  }
  
diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c 
b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c
new file mode 100644
index 00000000000..84f1b0a88f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model 
-fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test if signed/unsigned int extended divisions get vectorized.  */
+
+#include "dive-vectorize-1.h"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {\mvdivesw\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvdiveuw\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.h 
b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.h
new file mode 100644
index 00000000000..119f637b46b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.h
@@ -0,0 +1,22 @@
+#define N 128
+
+typedef signed int si;
+typedef unsigned int ui;
+
+si si_a[N], si_b[N], si_c[N];
+ui ui_a[N], ui_b[N], ui_c[N];
+
+__attribute__ ((noipa)) void
+test_divwe ()
+{
+  for (int i = 0; i < N; i++)
+    si_c[i] = __builtin_divwe (si_a[i], si_b[i]);
+}
+
+__attribute__ ((noipa)) void
+test_divweu ()
+{
+  for (int i = 0; i < N; i++)
+    ui_c[i] = __builtin_divweu (ui_a[i], ui_b[i]);
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.c 
b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.c
new file mode 100644
index 00000000000..4db0085562f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model 
-fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test if signed/unsigned long long extended divisions get vectorized.  */
+
+#include "dive-vectorize-2.h"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {\mvdivesd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvdiveud\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.h 
b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.h
new file mode 100644
index 00000000000..1cab56b2e0b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.h
@@ -0,0 +1,22 @@
+#define N 128
+
+typedef signed long long sLL;
+typedef unsigned long long uLL;
+
+sLL sll_a[N], sll_b[N], sll_c[N];
+uLL ull_a[N], ull_b[N], ull_c[N];
+
+__attribute__ ((noipa)) void
+test_divde ()
+{
+  for (int i = 0; i < N; i++)
+    sll_c[i] = __builtin_divde (sll_a[i], sll_b[i]);
+}
+
+__attribute__ ((noipa)) void
+test_divdeu ()
+{
+  for (int i = 0; i < N; i++)
+    ull_c[i] = __builtin_divdeu (ull_a[i], ull_b[i]);
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-1.c 
b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-1.c
new file mode 100644
index 00000000000..1d5cbaa9f9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize 
-fno-vect-cost-model" } */
+
+#include "dive-vectorize-1.h"
+
+/* Check if test cases with signed/unsigned int extended division
+   vectorization run successfully.  */
+
+__attribute__ ((optimize (1))) void
+check_divwe ()
+{
+  test_divwe ();
+  for (int i = 0; i < N; i++)
+    {
+      si exp = __builtin_divwe (si_a[i], si_b[i]);
+      if (exp != si_c[i])
+       __builtin_abort ();
+    }
+}
+
+__attribute__ ((optimize (1))) void
+check_divweu ()
+{
+  test_divweu ();
+  for (int i = 0; i < N; i++)
+    {
+      ui exp = __builtin_divweu (ui_a[i], ui_b[i]);
+      if (exp != ui_c[i])
+       __builtin_abort ();
+    }
+}
+
+int
+main ()
+{
+  for (int i = 0; i < N; i++)
+    {
+      si_a[i] = 0x10 * (i * 3 + 2);
+      si_b[i] = 0x7890 * (i * 3 + 1);
+      ui_a[i] = 0x234 * (i * 11 + 3) - 0xcd * (i * 5 - 7);
+      ui_b[i] = 0x6078 * (i * 7 + 3) + 0xef * (i * 7 - 11);
+      if (si_b[i] == 0 || ui_b[i] == 0)
+       __builtin_abort ();
+    }
+
+  check_divwe ();
+  check_divweu ();
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-2.c 
b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-2.c
new file mode 100644
index 00000000000..921b07b3f1b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize 
-fno-vect-cost-model" } */
+
+#include "dive-vectorize-2.h"
+
+/* Check if test cases with signed/unsigned int extended division
+   vectorization run successfully.  */
+
+__attribute__ ((optimize (1))) void
+check_divde ()
+{
+  test_divde ();
+  for (int i = 0; i < N; i++)
+    {
+      sLL exp = __builtin_divde (sll_a[i], sll_b[i]);
+      if (exp != sll_c[i])
+       __builtin_abort ();
+    }
+}
+
+__attribute__ ((optimize (1))) void
+check_divdeu ()
+{
+  test_divdeu ();
+  for (int i = 0; i < N; i++)
+    {
+      uLL exp = __builtin_divdeu (ull_a[i], ull_b[i]);
+      if (exp != ull_c[i])
+       __builtin_abort ();
+    }
+}
+
+int
+main ()
+{
+  for (int i = 0; i < N; i++)
+    {
+      sll_a[i] = 0x102 * (i * 3 + 2);
+      sll_b[i] = 0x789ab * (i * 3 + 1);
+      ull_a[i] = 0x2345 * (i * 11 + 3) - 0xcd1 * (i * 5 - 7);
+      ull_b[i] = 0x6078e * (i * 7 + 3) + 0xefa * (i * 7 - 11);
+      if (sll_b[i] == 0 || ull_b[i] == 0)
+       __builtin_abort ();
+    }
+
+  check_divde ();
+  check_divdeu ();
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.c 
b/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.c
new file mode 100644
index 00000000000..9b8b3642ead
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model 
-fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test if some Power10 built-in functions get vectorized.  */
+
+#include "p10-bifs-vectorize-1.h"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 5 "vect" } } */
+/* { dg-final { scan-assembler-times {\mvcfuged\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvclzdm\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvctzdm\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvpdepd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvpextd\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.h 
b/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.h
new file mode 100644
index 00000000000..80b7aacf810
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.h
@@ -0,0 +1,40 @@
+#define N 32
+
+typedef unsigned long long uLL;
+uLL ull_a[N], ull_b[N], ull_c[N];
+
+__attribute__ ((noipa)) void
+test_cfuged ()
+{
+  for (int i = 0; i < N; i++)
+    ull_c[i] = __builtin_cfuged (ull_a[i], ull_b[i]);
+}
+
+__attribute__ ((noipa)) void
+test_cntlzdm ()
+{
+  for (int i = 0; i < N; i++)
+    ull_c[i] = __builtin_cntlzdm (ull_a[i], ull_b[i]);
+}
+
+__attribute__ ((noipa)) void
+test_cnttzdm ()
+{
+  for (int i = 0; i < N; i++)
+    ull_c[i] = __builtin_cnttzdm (ull_a[i], ull_b[i]);
+}
+
+__attribute__ ((noipa)) void
+test_pdepd ()
+{
+  for (int i = 0; i < N; i++)
+    ull_c[i] = __builtin_pdepd (ull_a[i], ull_b[i]);
+}
+
+__attribute__ ((noipa)) void
+test_pextd ()
+{
+  for (int i = 0; i < N; i++)
+    ull_c[i] = __builtin_pextd (ull_a[i], ull_b[i]);
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-run-1.c 
b/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-run-1.c
new file mode 100644
index 00000000000..4b3b1165663
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-run-1.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize 
-fno-vect-cost-model" } */
+
+#include "p10-bifs-vectorize-1.h"
+
+/* Check if vectorized built-in functions run expectedly.  */
+
+#define CHECK(name)                                                           \
+  __attribute__ ((optimize (1))) void check_##name ()                         \
+  {                                                                           \
+    test_##name ();                                                           \
+    for (int i = 0; i < N; i++)                                               \
+      {                                                                       \
+       uLL exp = __builtin_##name (ull_a[i], ull_b[i]);                      \
+       if (exp != ull_c[i])                                                  \
+         __builtin_abort ();                                                 \
+      }                                                                       \
+  }
+
+CHECK (cfuged)
+CHECK (cntlzdm)
+CHECK (cnttzdm)
+CHECK (pdepd)
+CHECK (pextd)
+
+int
+main ()
+{
+  for (int i = 0; i < N; i++)
+    {
+      ull_a[i] = 0x789a * (i * 11 - 5) - 0xcd1 * (i * 5 - 7);
+      ull_b[i] = 0xfedc * (i * 7 + 3) + 0x467 * (i * 7 - 11);
+    }
+
+  check_cfuged ();
+  check_cntlzdm ();
+  check_cnttzdm ();
+  check_pdepd ();
+  check_pextd ();
+
+  return 0;
+}
+
--
2.17.1



BR,
Kewen

Reply via email to