https://gcc.gnu.org/g:6b1cf59e90d3d6391d61b2a8f77856b5aa044014
commit r15-3397-g6b1cf59e90d3d6391d61b2a8f77856b5aa044014 Author: Jennifer Schmitz <jschm...@nvidia.com> Date: Fri Aug 30 07:16:43 2024 -0700 SVE intrinsics: Fold constant operands for svmul. This patch implements constant folding for svmul by calling gimple_folder::fold_const_binary with tree_code MULT_EXPR. Tests were added to check the produced assembly for different predicates, signed and unsigned integers, and the svmul_n_* case. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com> gcc/ * config/aarch64/aarch64-sve-builtins-base.cc (svmul_impl::fold): Try constant folding. gcc/testsuite/ * gcc.target/aarch64/sve/const_fold_mul_1.c: New test. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-base.cc | 15 +- .../gcc.target/aarch64/sve/const_fold_mul_1.c | 302 +++++++++++++++++++++ 2 files changed, 316 insertions(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 6c94d144dc9c..8f781e26cc84 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -2000,6 +2000,19 @@ public: } }; +class svmul_impl : public rtx_code_function +{ +public: + CONSTEXPR svmul_impl () + : rtx_code_function (MULT, MULT, UNSPEC_COND_FMUL) {} + + gimple * + fold (gimple_folder &f) const override + { + return f.fold_const_binary (MULT_EXPR); + } +}; + class svnand_impl : public function_base { public: @@ -3184,7 +3197,7 @@ FUNCTION (svmls_lane, svmls_lane_impl,) FUNCTION (svmmla, svmmla_impl,) FUNCTION (svmov, svmov_impl,) FUNCTION (svmsb, svmsb_impl,) -FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL)) +FUNCTION (svmul, svmul_impl,) FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),) FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART, UNSPEC_UMUL_HIGHPART, -1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c new file mode 100644 index 000000000000..6d68607b5492 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c @@ -0,0 +1,302 @@ +/* { dg-final { check-function-bodies "**" "" } } */ +/* { dg-options "-O2" } */ + +#include "arm_sve.h" + +/* +** s64_x_pg: +** mov z[0-9]+\.d, #15 +** ret +*/ +svint64_t s64_x_pg (svbool_t pg) +{ + return svmul_x (pg, svdup_s64 (5), svdup_s64 (3)); +} + +/* +** s64_x_pg_0: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_x_pg_0 (svbool_t pg) +{ + return svmul_x (pg, svdup_s64 (0), svdup_s64 (3)); +} + +/* +** s64_z_pg: +** mov z[0-9]+\.d, p[0-7]/z, #15 +** ret +*/ +svint64_t s64_z_pg (svbool_t pg) +{ + return svmul_z (pg, svdup_s64 (5), svdup_s64 (3)); +} + +/* +** s64_z_pg_0: +** mov z[0-9]+\.d, p[0-7]/z, #0 +** ret +*/ +svint64_t s64_z_pg_0 (svbool_t pg) +{ + return svmul_z (pg, svdup_s64 (0), svdup_s64 (3)); +} + +/* +** s64_m_pg: +** mov (z[0-9]+\.d), #3 +** mov (z[0-9]+\.d), #5 +** mul \2, p[0-7]/m, \2, \1 +** ret +*/ +svint64_t s64_m_pg (svbool_t pg) +{ + return svmul_m (pg, svdup_s64 (5), svdup_s64 (3)); +} + +/* +** s64_x_ptrue: +** mov z[0-9]+\.d, #15 +** ret +*/ +svint64_t s64_x_ptrue () +{ + return svmul_x (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3)); +} + +/* +** s64_z_ptrue: +** mov z[0-9]+\.d, #15 +** ret +*/ +svint64_t s64_z_ptrue () +{ + return svmul_z (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3)); +} + +/* +** s64_m_ptrue: +** mov z[0-9]+\.d, #15 +** ret +*/ +svint64_t s64_m_ptrue () +{ + return svmul_m (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3)); +} + +/* +** s64_x_pg_n: +** mov z[0-9]+\.d, #15 +** ret +*/ +svint64_t s64_x_pg_n (svbool_t pg) +{ + return svmul_n_s64_x (pg, svdup_s64 (5), 3); +} + +/* +** s64_x_pg_n_s64_0: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_x_pg_n_s64_0 (svbool_t pg) +{ + return svmul_n_s64_x (pg, svdup_s64 (5), 0); +} + +/* +** s64_z_pg_n: +** mov z[0-9]+\.d, p[0-7]/z, #15 +** ret +*/ +svint64_t s64_z_pg_n (svbool_t pg) +{ + return svmul_n_s64_z (pg, svdup_s64 (5), 3); +} + +/* +** s64_z_pg_n_s64_0: +** mov z[0-9]+\.d, p[0-7]/z, #0 +** ret +*/ +svint64_t s64_z_pg_n_s64_0 (svbool_t pg) +{ + return svmul_n_s64_z (pg, svdup_s64 (5), 0); +} + +/* +** s64_m_pg_n: +** mov (z[0-9]+\.d), #3 +** mov (z[0-9]+\.d), #5 +** mul \2, p[0-7]/m, \2, \1 +** ret +*/ +svint64_t s64_m_pg_n (svbool_t pg) +{ + return svmul_n_s64_m (pg, svdup_s64 (5), 3); +} + +/* +** s64_x_ptrue_n: +** mov z[0-9]+\.d, #15 +** ret +*/ +svint64_t s64_x_ptrue_n () +{ + return svmul_n_s64_x (svptrue_b64 (), svdup_s64 (5), 3); +} + +/* +** s64_z_ptrue_n: +** mov z[0-9]+\.d, #15 +** ret +*/ +svint64_t s64_z_ptrue_n () +{ + return svmul_n_s64_z (svptrue_b64 (), svdup_s64 (5), 3); +} + +/* +** s64_m_ptrue_n: +** mov z[0-9]+\.d, #15 +** ret +*/ +svint64_t s64_m_ptrue_n () +{ + return svmul_n_s64_m (svptrue_b64 (), svdup_s64 (5), 3); +} + +/* +** u64_x_pg: +** mov z[0-9]+\.d, #15 +** ret +*/ +svuint64_t u64_x_pg (svbool_t pg) +{ + return svmul_x (pg, svdup_u64 (5), svdup_u64 (3)); +} + +/* +** u64_z_pg: +** mov z[0-9]+\.d, p[0-7]/z, #15 +** ret +*/ +svuint64_t u64_z_pg (svbool_t pg) +{ + return svmul_z (pg, svdup_u64 (5), svdup_u64 (3)); +} + +/* +** u64_m_pg: +** mov (z[0-9]+\.d), #3 +** mov (z[0-9]+\.d), #5 +** mul \2, p[0-7]/m, \2, \1 +** ret +*/ +svuint64_t u64_m_pg (svbool_t pg) +{ + return svmul_m (pg, svdup_u64 (5), svdup_u64 (3)); +} + +/* +** u64_x_ptrue: +** mov z[0-9]+\.d, #15 +** ret +*/ +svuint64_t u64_x_ptrue () +{ + return svmul_x (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3)); +} + +/* +** u64_z_ptrue: +** mov z[0-9]+\.d, #15 +** ret +*/ +svuint64_t u64_z_ptrue () +{ + return svmul_z (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3)); +} + +/* +** u64_m_ptrue: +** mov z[0-9]+\.d, #15 +** ret +*/ +svuint64_t u64_m_ptrue () +{ + return svmul_m (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3)); +} + +/* +** u64_x_pg_n: +** mov z[0-9]+\.d, #15 +** ret +*/ +svuint64_t u64_x_pg_n (svbool_t pg) +{ + return svmul_n_u64_x (pg, svdup_u64 (5), 3); +} + +/* +** u64_z_pg_n: +** mov z[0-9]+\.d, p[0-7]/z, #15 +** ret +*/ +svuint64_t u64_z_pg_n (svbool_t pg) +{ + return svmul_n_u64_z (pg, svdup_u64 (5), 3); +} + +/* +** u64_m_pg_n: +** mov (z[0-9]+\.d), #3 +** mov (z[0-9]+\.d), #5 +** mul \2, p[0-7]/m, \2, \1 +** ret +*/ +svuint64_t u64_m_pg_n (svbool_t pg) +{ + return svmul_n_u64_m (pg, svdup_u64 (5), 3); +} + +/* +** u64_x_ptrue_n: +** mov z[0-9]+\.d, #15 +** ret +*/ +svuint64_t u64_x_ptrue_n () +{ + return svmul_n_u64_x (svptrue_b64 (), svdup_u64 (5), 3); +} + +/* +** u64_z_ptrue_n: +** mov z[0-9]+\.d, #15 +** ret +*/ +svuint64_t u64_z_ptrue_n () +{ + return svmul_n_u64_z (svptrue_b64 (), svdup_u64 (5), 3); +} + +/* +** u64_m_ptrue_n: +** mov z[0-9]+\.d, #15 +** ret +*/ +svuint64_t u64_m_ptrue_n () +{ + return svmul_n_u64_m (svptrue_b64 (), svdup_u64 (5), 3); +} + +/* +** u32_x_pg: +** mov z[0-9]+\.s, #60 +** ret +*/ +svuint32_t u32_x_pg (svbool_t pg) +{ + return svmul_x (pg, svdupq_u32 (3, 15, 1, 12), svdupq_u32 (20, 4, 60, 5)); +}