https://gcc.gnu.org/g:90e38c4ffad086a82635e8ea9bf0e7e9e02f1ff7

commit r15-4590-g90e38c4ffad086a82635e8ea9bf0e7e9e02f1ff7
Author: Jennifer Schmitz <jschm...@nvidia.com>
Date:   Tue Oct 15 07:58:14 2024 -0700

    SVE intrinsics: Add constant folding for svindex.
    
    This patch folds svindex with constant arguments into a vector series.
    We implemented this in svindex_impl::fold using the function 
build_vec_series.
    For example,
    svuint64_t f1 ()
    {
      return svindex_u642 (10, 3);
    }
    compiled with -O2 -march=armv8.2-a+sve, is folded to {10, 13, 16, ...}
    in the gimple pass lower.
    This optimization benefits cases where svindex is used in combination with
    other gimple-level optimizations.
    For example,
    svuint64_t f2 ()
    {
        return svmul_x (svptrue_b64 (), svindex_u64 (10, 3), 5);
    }
    has previously been compiled to
    f2:
            index   z0.d, #10, #3
            mul     z0.d, z0.d, #5
            ret
    Now, it is compiled to
    f2:
            mov     x0, 50
            index   z0.d, x0, #15
            ret
    
    We added test cases checking
    - the application of the transform during gimple for constant arguments,
    - the interaction with another gimple-level optimization.
    
    The patch was bootstrapped and regtested on aarch64-linux-gnu, no 
regression.
    OK for mainline?
    
    Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com>
    
    gcc/
            * config/aarch64/aarch64-sve-builtins-base.cc
            (svindex_impl::fold): Add constant folding.
    
    gcc/testsuite/
            * gcc.target/aarch64/sve/index_const_fold.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc    | 14 +++++++++
 .../gcc.target/aarch64/sve/index_const_fold.c      | 35 ++++++++++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 70bd83005d7c..e47acb67aeea 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1301,6 +1301,20 @@ public:
 
 class svindex_impl : public function_base
 {
+public:
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    /* Apply constant folding if base and step are integer constants.  */
+    tree vec_type = TREE_TYPE (f.lhs);
+    tree base = gimple_call_arg (f.call, 0);
+    tree step = gimple_call_arg (f.call, 1);
+    if (TREE_CODE (base) != INTEGER_CST || TREE_CODE (step) != INTEGER_CST)
+      return NULL;
+    return gimple_build_assign (f.lhs,
+                               build_vec_series (vec_type, base, step));
+  }
+
 public:
   rtx
   expand (function_expander &e) const override
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c 
b/gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c
new file mode 100644
index 000000000000..7abb803f58ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+#include <arm_sve.h>
+#include <stdint.h>
+
+#define INDEX_CONST(TYPE, TY)                          \
+  sv##TYPE f_##TY##_index_const ()                     \
+  {                                                    \
+    return svindex_##TY (10, 3);                       \
+  }
+
+#define MULT_INDEX(TYPE, TY)                           \
+  sv##TYPE f_##TY##_mult_index ()                      \
+  {                                                    \
+    return svmul_x (svptrue_b8 (),                     \
+                   svindex_##TY (10, 3),               \
+                   5);                                 \
+  }
+
+#define ALL_TESTS(TYPE, TY)                            \
+  INDEX_CONST (TYPE, TY)                               \
+  MULT_INDEX (TYPE, TY)
+
+ALL_TESTS (uint8_t, u8)
+ALL_TESTS (uint16_t, u16)
+ALL_TESTS (uint32_t, u32)
+ALL_TESTS (uint64_t, u64)
+ALL_TESTS (int8_t, s8)
+ALL_TESTS (int16_t, s16)
+ALL_TESTS (int32_t, s32)
+ALL_TESTS (int64_t, s64)
+
+/* { dg-final { scan-tree-dump-times "return \\{ 10, 13, 16, ... \\}" 8 
"optimized" } } */
+/* { dg-final { scan-tree-dump-times "return \\{ 50, 65, 80, ... \\}" 8 
"optimized" } } */

Reply via email to