https://gcc.gnu.org/g:6b1cf59e90d3d6391d61b2a8f77856b5aa044014

commit r15-3397-g6b1cf59e90d3d6391d61b2a8f77856b5aa044014
Author: Jennifer Schmitz <jschm...@nvidia.com>
Date:   Fri Aug 30 07:16:43 2024 -0700

    SVE intrinsics: Fold constant operands for svmul.
    
    This patch implements constant folding for svmul by calling
    gimple_folder::fold_const_binary with tree_code MULT_EXPR.
    Tests were added to check the produced assembly for different
    predicates, signed and unsigned integers, and the svmul_n_* case.
    
    The patch was bootstrapped and regtested on aarch64-linux-gnu, no 
regression.
    OK for mainline?
    
    Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com>
    
    gcc/
            * config/aarch64/aarch64-sve-builtins-base.cc (svmul_impl::fold):
            Try constant folding.
    
    gcc/testsuite/
            * gcc.target/aarch64/sve/const_fold_mul_1.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc    |  15 +-
 .../gcc.target/aarch64/sve/const_fold_mul_1.c      | 302 +++++++++++++++++++++
 2 files changed, 316 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 6c94d144dc9c..8f781e26cc84 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -2000,6 +2000,19 @@ public:
   }
 };
 
+class svmul_impl : public rtx_code_function
+{
+public:
+  CONSTEXPR svmul_impl ()
+    : rtx_code_function (MULT, MULT, UNSPEC_COND_FMUL) {}
+
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    return f.fold_const_binary (MULT_EXPR);
+  }
+};
+
 class svnand_impl : public function_base
 {
 public:
@@ -3184,7 +3197,7 @@ FUNCTION (svmls_lane, svmls_lane_impl,)
 FUNCTION (svmmla, svmmla_impl,)
 FUNCTION (svmov, svmov_impl,)
 FUNCTION (svmsb, svmsb_impl,)
-FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL))
+FUNCTION (svmul, svmul_impl,)
 FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),)
 FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART,
                                          UNSPEC_UMUL_HIGHPART, -1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c
new file mode 100644
index 000000000000..6d68607b5492
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c
@@ -0,0 +1,302 @@
+/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-options "-O2" } */
+
+#include "arm_sve.h"
+
+/*
+** s64_x_pg:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svint64_t s64_x_pg (svbool_t pg)
+{
+  return svmul_x (pg, svdup_s64 (5), svdup_s64 (3));
+}
+
+/*
+** s64_x_pg_0:
+**     mov     z[0-9]+\.b, #0
+**     ret
+*/
+svint64_t s64_x_pg_0 (svbool_t pg)
+{
+  return svmul_x (pg, svdup_s64 (0), svdup_s64 (3));
+}
+
+/*
+** s64_z_pg:
+**     mov     z[0-9]+\.d, p[0-7]/z, #15
+**     ret
+*/
+svint64_t s64_z_pg (svbool_t pg)
+{
+  return svmul_z (pg, svdup_s64 (5), svdup_s64 (3));
+}
+
+/*
+** s64_z_pg_0:
+**     mov     z[0-9]+\.d, p[0-7]/z, #0
+**     ret
+*/
+svint64_t s64_z_pg_0 (svbool_t pg)
+{
+  return svmul_z (pg, svdup_s64 (0), svdup_s64 (3));
+}
+
+/*
+** s64_m_pg:
+**     mov     (z[0-9]+\.d), #3
+**     mov     (z[0-9]+\.d), #5
+**     mul     \2, p[0-7]/m, \2, \1
+**     ret
+*/
+svint64_t s64_m_pg (svbool_t pg)
+{
+  return svmul_m (pg, svdup_s64 (5), svdup_s64 (3));
+}
+
+/*
+** s64_x_ptrue:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svint64_t s64_x_ptrue ()
+{
+  return svmul_x (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3));
+}
+
+/*
+** s64_z_ptrue:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svint64_t s64_z_ptrue ()
+{
+  return svmul_z (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3));
+}
+
+/*
+** s64_m_ptrue:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svint64_t s64_m_ptrue ()
+{
+  return svmul_m (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3));
+}
+
+/*
+** s64_x_pg_n:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svint64_t s64_x_pg_n (svbool_t pg)
+{
+  return svmul_n_s64_x (pg, svdup_s64 (5), 3);
+}
+
+/*
+** s64_x_pg_n_s64_0:
+**     mov     z[0-9]+\.b, #0
+**     ret
+*/
+svint64_t s64_x_pg_n_s64_0 (svbool_t pg)
+{
+  return svmul_n_s64_x (pg, svdup_s64 (5), 0);
+}
+
+/*
+** s64_z_pg_n:
+**     mov     z[0-9]+\.d, p[0-7]/z, #15
+**     ret
+*/
+svint64_t s64_z_pg_n (svbool_t pg)
+{
+  return svmul_n_s64_z (pg, svdup_s64 (5), 3);
+}
+
+/*
+** s64_z_pg_n_s64_0:
+**     mov     z[0-9]+\.d, p[0-7]/z, #0
+**     ret
+*/
+svint64_t s64_z_pg_n_s64_0 (svbool_t pg)
+{
+  return svmul_n_s64_z (pg, svdup_s64 (5), 0);
+}
+
+/*
+** s64_m_pg_n:
+**     mov     (z[0-9]+\.d), #3
+**     mov     (z[0-9]+\.d), #5
+**     mul     \2, p[0-7]/m, \2, \1
+**     ret
+*/
+svint64_t s64_m_pg_n (svbool_t pg)
+{
+  return svmul_n_s64_m (pg, svdup_s64 (5), 3);
+}
+
+/*
+** s64_x_ptrue_n:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svint64_t s64_x_ptrue_n ()
+{
+  return svmul_n_s64_x (svptrue_b64 (), svdup_s64 (5), 3);
+}
+
+/*
+** s64_z_ptrue_n:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svint64_t s64_z_ptrue_n ()
+{
+  return svmul_n_s64_z (svptrue_b64 (), svdup_s64 (5), 3);
+}
+
+/*
+** s64_m_ptrue_n:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svint64_t s64_m_ptrue_n ()
+{
+  return svmul_n_s64_m (svptrue_b64 (), svdup_s64 (5), 3);
+}
+
+/*
+** u64_x_pg:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svuint64_t u64_x_pg (svbool_t pg)
+{
+  return svmul_x (pg, svdup_u64 (5), svdup_u64 (3));
+}
+
+/*
+** u64_z_pg:
+**     mov     z[0-9]+\.d, p[0-7]/z, #15
+**     ret
+*/
+svuint64_t u64_z_pg (svbool_t pg)
+{
+  return svmul_z (pg, svdup_u64 (5), svdup_u64 (3));
+}
+
+/*
+** u64_m_pg:
+**     mov     (z[0-9]+\.d), #3
+**     mov     (z[0-9]+\.d), #5
+**     mul     \2, p[0-7]/m, \2, \1
+**     ret
+*/
+svuint64_t u64_m_pg (svbool_t pg)
+{
+  return svmul_m (pg, svdup_u64 (5), svdup_u64 (3));
+}
+
+/*
+** u64_x_ptrue:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svuint64_t u64_x_ptrue ()
+{
+  return svmul_x (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3));
+}
+
+/*
+** u64_z_ptrue:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svuint64_t u64_z_ptrue ()
+{
+  return svmul_z (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3));
+}
+
+/*
+** u64_m_ptrue:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svuint64_t u64_m_ptrue ()
+{
+  return svmul_m (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3));
+}
+
+/*
+** u64_x_pg_n:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svuint64_t u64_x_pg_n (svbool_t pg)
+{
+  return svmul_n_u64_x (pg, svdup_u64 (5), 3);
+}
+
+/*
+** u64_z_pg_n:
+**     mov     z[0-9]+\.d, p[0-7]/z, #15
+**     ret
+*/
+svuint64_t u64_z_pg_n (svbool_t pg)
+{
+  return svmul_n_u64_z (pg, svdup_u64 (5), 3);
+}
+
+/*
+** u64_m_pg_n:
+**     mov     (z[0-9]+\.d), #3
+**     mov     (z[0-9]+\.d), #5
+**     mul     \2, p[0-7]/m, \2, \1
+**     ret
+*/
+svuint64_t u64_m_pg_n (svbool_t pg)
+{
+  return svmul_n_u64_m (pg, svdup_u64 (5), 3);
+}
+
+/*
+** u64_x_ptrue_n:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svuint64_t u64_x_ptrue_n ()
+{
+  return svmul_n_u64_x (svptrue_b64 (), svdup_u64 (5), 3);
+}
+
+/*
+** u64_z_ptrue_n:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svuint64_t u64_z_ptrue_n ()
+{
+  return svmul_n_u64_z (svptrue_b64 (), svdup_u64 (5), 3);
+}
+
+/*
+** u64_m_ptrue_n:
+**     mov     z[0-9]+\.d, #15
+**     ret
+*/
+svuint64_t u64_m_ptrue_n ()
+{
+  return svmul_n_u64_m (svptrue_b64 (), svdup_u64 (5), 3);
+}
+
+/*
+** u32_x_pg:
+**     mov     z[0-9]+\.s, #60
+**     ret
+*/
+svuint32_t u32_x_pg (svbool_t pg)
+{
+  return svmul_x (pg, svdupq_u32 (3, 15, 1, 12), svdupq_u32 (20, 4, 60, 5));
+}

Reply via email to