https://gcc.gnu.org/g:0c5c0c959c2e592b84739f19ca771fa69eb8dfee

commit r15-2192-g0c5c0c959c2e592b84739f19ca771fa69eb8dfee
Author: Tamar Christina <tamar.christ...@arm.com>
Date:   Mon Jul 22 10:28:19 2024 +0100

    AArch64: implement TARGET_VECTORIZE_CONDITIONAL_OPERATION_IS_EXPENSIVE  
[PR115531].
    
    This implements the new target hook indicating that for AArch64 when 
possible
    we prefer masked operations for any type vs doing LOAD + SELECT or
    SELECT + STORE.
    
    Thanks,
    Tamar
    
    gcc/ChangeLog:
    
            PR tree-optimization/115531
            * config/aarch64/aarch64.cc
            (aarch64_conditional_operation_is_expensive): New.
            (TARGET_VECTORIZE_CONDITIONAL_OPERATION_IS_EXPENSIVE): New.
    
    gcc/testsuite/ChangeLog:
    
            PR tree-optimization/115531
            * gcc.dg/vect/vect-conditional_store_1.c: New test.
            * gcc.dg/vect/vect-conditional_store_2.c: New test.
            * gcc.dg/vect/vect-conditional_store_3.c: New test.
            * gcc.dg/vect/vect-conditional_store_4.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64.cc                      | 12 ++++++++++
 .../gcc.dg/vect/vect-conditional_store_1.c         | 24 +++++++++++++++++++
 .../gcc.dg/vect/vect-conditional_store_2.c         | 24 +++++++++++++++++++
 .../gcc.dg/vect/vect-conditional_store_3.c         | 24 +++++++++++++++++++
 .../gcc.dg/vect/vect-conditional_store_4.c         | 28 ++++++++++++++++++++++
 5 files changed, 112 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 0d41a193ec18..89eb66348f77 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -28211,6 +28211,15 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
   return true;
 }
 
+/* Implement TARGET_VECTORIZE_CONDITIONAL_OPERATION_IS_EXPENSIVE.  Assume that
+   predicated operations when available are beneficial.  */
+
+static bool
+aarch64_conditional_operation_is_expensive (unsigned)
+{
+  return false;
+}
+
 /* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE.  Assume for now that
    it isn't worth branching around empty masked ops (including masked
    stores).  */
@@ -30898,6 +30907,9 @@ aarch64_libgcc_floating_mode_supported_p
 #define TARGET_VECTORIZE_RELATED_MODE aarch64_vectorize_related_mode
 #undef TARGET_VECTORIZE_GET_MASK_MODE
 #define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
+#undef TARGET_VECTORIZE_CONDITIONAL_OPERATION_IS_EXPENSIVE
+#define TARGET_VECTORIZE_CONDITIONAL_OPERATION_IS_EXPENSIVE \
+  aarch64_conditional_operation_is_expensive
 #undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
 #define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \
   aarch64_empty_mask_is_expensive
diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_1.c 
b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_1.c
new file mode 100644
index 000000000000..03128b1f19b2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo1 (char *restrict a, int *restrict b, int *restrict c, int n, int 
stride)
+{
+  if (stride <= 1)
+    return;
+
+  for (int i = 0; i < n; i++)
+    {
+      int res = c[i];
+      int t = b[i+stride];
+      if (a[i] != 0)
+        res = t;
+      c[i] = res;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { target 
aarch64-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_2.c 
b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_2.c
new file mode 100644
index 000000000000..a03898793c0b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo2 (char *restrict a, int *restrict b, int *restrict c, int n, int 
stride)
+{
+  if (stride <= 1)
+    return;
+
+  for (int i = 0; i < n; i++)
+    {
+      int res = c[i];
+      int t = b[i+stride];
+      if (a[i] != 0)
+        t = res;
+      c[i] = t;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { target 
aarch64-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_3.c 
b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_3.c
new file mode 100644
index 000000000000..8a898755c1ca
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_3.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo3 (float *restrict a, int *restrict b, int *restrict c, int n, int 
stride)
+{
+  if (stride <= 1)
+    return;
+
+  for (int i = 0; i < n; i++)
+    {
+      int res = c[i];
+      int t = b[i+stride];
+      if (a[i] >= 0)
+        t = res;
+      c[i] = t;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { target 
aarch64-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_4.c 
b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_4.c
new file mode 100644
index 000000000000..18ef42e6ad34
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_4.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo4 (signed char *restrict a, int *restrict b, int *restrict c, int 
*restrict d, int n, int stride)
+{
+  if (stride <= 1)
+    return;
+
+  for (int i = 0; i < n; i++)
+    {
+      int res1 = c[i];
+      int res2 = d[i];
+      int t = b[i+stride];
+      if (a[i] > 0)
+        t = res1;
+      else if (a[i] < 0)
+        t = res2 * 2;
+
+      c[i] = t;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-times "VEC_COND_EXPR " "vect" 1 { target 
aarch64-*-* } } } */

Reply via email to