https://gcc.gnu.org/g:3e7549ece7c6b90b9e961778361ee2b65bf104a9
commit r15-4592-g3e7549ece7c6b90b9e961778361ee2b65bf104a9 Author: Soumya AR <soum...@nvidia.com> Date: Thu Oct 17 09:30:35 2024 +0530 SVE intrinsics: Fold constant operands for svlsl. This patch implements constant folding for svlsl. Test cases have been added to check for the following cases: Zero, merge, and don't care predication. Shift by 0. Shift by register width. Overflow shift on signed and unsigned integers. Shift on a negative integer. Maximum possible shift, eg. shift by 7 on an 8-bit integer. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Soumya AR <soum...@nvidia.com> gcc/ChangeLog: * config/aarch64/aarch64-sve-builtins-base.cc (svlsl_impl::fold): Try constant folding. * config/aarch64/aarch64-sve-builtins.cc (aarch64_const_binop): Return 0 if shift is out of range. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/const_fold_lsl_1.c: New test. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-base.cc | 15 ++- gcc/config/aarch64/aarch64-sve-builtins.cc | 5 +- .../gcc.target/aarch64/sve/const_fold_lsl_1.c | 142 +++++++++++++++++++++ 3 files changed, 160 insertions(+), 2 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 327688756d1b..fe16d93adcd1 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -1926,6 +1926,19 @@ public: } }; +class svlsl_impl : public rtx_code_function +{ +public: + CONSTEXPR svlsl_impl () + : rtx_code_function (ASHIFT, ASHIFT) {} + + gimple * + fold (gimple_folder &f) const override + { + return f.fold_const_binary (LSHIFT_EXPR); + } +}; + class svmad_impl : public function_base { public: @@ -3304,7 +3317,7 @@ FUNCTION (svldnf1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDNF1)) FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1)) FUNCTION (svldnt1, svldnt1_impl,) FUNCTION (svlen, svlen_impl,) -FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT)) +FUNCTION (svlsl, svlsl_impl,) FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE)) FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT)) FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE)) diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 41673745cfea..af6469fff716 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -1147,7 +1147,10 @@ aarch64_const_binop (enum tree_code code, tree arg1, tree arg2) /* Return 0 for division by 0, like SDIV and UDIV do. */ if (code == TRUNC_DIV_EXPR && integer_zerop (arg2)) return arg2; - + /* Return 0 if shift amount is out of range. */ + if (code == LSHIFT_EXPR + && wi::geu_p (wi::to_wide (arg2), TYPE_PRECISION (type))) + return build_int_cst (type, 0); if (!poly_int_binop (poly_res, code, arg1, arg2, sign, &overflow)) return NULL_TREE; return force_fit_type (type, poly_res, false, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c new file mode 100644 index 000000000000..6109558001a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_lsl_1.c @@ -0,0 +1,142 @@ +/* { dg-final { check-function-bodies "**" "" } } */ +/* { dg-options "-O2" } */ + +#include "arm_sve.h" + +/* +** s64_x: +** mov z[0-9]+\.d, #20 +** ret +*/ +svint64_t s64_x (svbool_t pg) { + return svlsl_n_s64_x (pg, svdup_s64 (5), 2); +} + +/* +** s64_x_vect: +** mov z[0-9]+\.d, #20 +** ret +*/ +svint64_t s64_x_vect (svbool_t pg) { + return svlsl_s64_x (pg, svdup_s64 (5), svdup_u64 (2)); +} + +/* +** s64_z: +** mov z[0-9]+\.d, p[0-7]/z, #20 +** ret +*/ +svint64_t s64_z (svbool_t pg) { + return svlsl_n_s64_z (pg, svdup_s64 (5), 2); +} + +/* +** s64_z_vect: +** mov z[0-9]+\.d, p[0-7]/z, #20 +** ret +*/ +svint64_t s64_z_vect (svbool_t pg) { + return svlsl_s64_z (pg, svdup_s64 (5), svdup_u64 (2)); +} + +/* +** s64_m_ptrue: +** mov z[0-9]+\.d, #20 +** ret +*/ +svint64_t s64_m_ptrue () { + return svlsl_n_s64_m (svptrue_b64 (), svdup_s64 (5), 2); +} + +/* +** s64_m_ptrue_vect: +** mov z[0-9]+\.d, #20 +** ret +*/ +svint64_t s64_m_ptrue_vect () { + return svlsl_s64_m (svptrue_b64 (), svdup_s64 (5), svdup_u64 (2)); +} + +/* +** s64_m_pg: +** mov z[0-9]+\.d, #5 +** lsl z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2 +** ret +*/ +svint64_t s64_m_pg (svbool_t pg) { + return svlsl_n_s64_m (pg, svdup_s64 (5), 2); +} + +/* +** s64_m_pg_vect: +** mov z[0-9]+\.d, #5 +** lsl z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2 +** ret +*/ +svint64_t s64_m_pg_vect (svbool_t pg) { + return svlsl_s64_m (pg, svdup_s64 (5), svdup_u64 (2)); +} + +/* +** s64_x_0: +** mov z[0-9]+\.d, #5 +** ret +*/ +svint64_t s64_x_0 (svbool_t pg) { + return svlsl_n_s64_x (pg, svdup_s64 (5), 0); +} + +/* +** s64_x_bit_width: +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** ret +*/ +svint64_t s64_x_bit_width (svbool_t pg) { + return svlsl_n_s64_x (pg, svdup_s64 (5), 64); +} + +/* +** s64_x_out_of_range: +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** ret +*/ +svint64_t s64_x_out_of_range (svbool_t pg) { + return svlsl_n_s64_x (pg, svdup_s64 (5), 68); +} + +/* +** u8_x_unsigned_overflow: +** mov z[0-9]+\.b, #-2 +** ret +*/ +svuint8_t u8_x_unsigned_overflow (svbool_t pg) { + return svlsl_n_u8_x (pg, svdup_u8 (255), 1); +} + +/* +** s8_x_signed_overflow: +** mov z[0-9]+\.b, #-2 +** ret +*/ +svint8_t s8_x_signed_overflow (svbool_t pg) { + return svlsl_n_s8_x (pg, svdup_s8 (255), 1); +} + +/* +** s8_x_neg_shift: +** mov z[0-9]+\.b, #-2 +** ret +*/ +svint8_t s8_x_neg_shift (svbool_t pg) { + return svlsl_n_s8_x (pg, svdup_s8 (-1), 1); +} + +/* +** s8_x_max_shift: +** mov z[0-9]+\.b, #-128 +** ret +*/ +svint8_t s8_x_max_shift (svbool_t pg) { + return svlsl_n_s8_x (pg, svdup_s8 (1), 7); +} +