Gentle Ping! please review.
Thanks & Regards Jeevitha On 27/10/25 12:06 pm, jeevitha wrote: > Ping! > > please review. > > Thanks & Regards > Jeevitha > > On 18/09/25 3:25 pm, jeevitha wrote: >> Hi All, >> >> The following patch has been bootstrapped and regtested on powerpc64le-linux. >> >> PowerPC vector shift left instructions (vslb, vslh, vslw, vsld) use modulo >> semantics for the shift amount. Shifts by (element_bit_width - 1) can be >> optimized by replacing the shift amount splat with a vector of 0xFF..FF. On >> Power8, this reduces instruction overhead by using vspltis[wd]. >> >> This patch adds rs6000_optimize_vector_bitwidth_shift to detect splat >> constants >> of (element_bit_width - 1) and replace them with a vector of all -1s, thereby >> avoiding unnecessary memory loads. >> >> 2025-09-18 Jeevitha Palanisamy <[email protected]> >> >> gcc/ >> PR target/119912 >> * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Call >> to new function. >> (rs6000_optimize_vector_bitwidth_shift): New function to optimize >> vector immediate shifts. >> >> gcc/testsuite/ >> PR target/119912 >> * gcc.target/powerpc/pr119912.c: New test. >> >> diff --git a/gcc/config/rs6000/rs6000-builtin.cc >> b/gcc/config/rs6000/rs6000-builtin.cc >> index bc1580f051b..517c99bfcfb 100644 >> --- a/gcc/config/rs6000/rs6000-builtin.cc >> +++ b/gcc/config/rs6000/rs6000-builtin.cc >> @@ -1264,6 +1264,68 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator >> *gsi, >> return true; >> } >> >> +/* Try to optimize shift by splat(element_bit_width - 1). >> + Returns true if handled, false otherwise. */ >> +static bool >> +rs6000_optimize_vector_bitwidth_shift (gimple_stmt_iterator *gsi, >> + tree arg0, tree arg1, >> + tree lhs, location_t loc, enum tree_code >> subcode) >> +{ >> + int element_bit_width = 128 / VECTOR_CST_NELTS (arg1); >> + tree arg1_type = TREE_TYPE (arg1); >> + tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1)); >> + tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type)); >> + tree check_arg = arg1; >> + >> + if (TARGET_P9_VECTOR || TYPE_PRECISION (unsigned_element_type) <= 16) >> + return false; >> + >> + while (TREE_CODE (check_arg) == SSA_NAME >> + || TREE_CODE (check_arg) == VIEW_CONVERT_EXPR) >> + { >> + if (TREE_CODE (check_arg) == SSA_NAME) >> + { >> + gimple *def_stmt = SSA_NAME_DEF_STMT (check_arg); >> + if (!def_stmt || !gimple_assign_lhs (def_stmt)) >> + break; >> + check_arg = gimple_assign_rhs1 (def_stmt); >> + } >> + else >> + check_arg = TREE_OPERAND (check_arg, 0); >> + } >> + >> + /* Optimize if splat of (element_bit_width - 1). */ >> + if (TREE_CODE (check_arg) == VECTOR_CST) >> + { >> + tree first_elt = vector_cst_elt (check_arg, 0); >> + bool is_splat = true; >> + >> + if (wi::to_widest (first_elt) != element_bit_width - 1) >> + return false; >> + >> + for (size_t i = 1; i < VECTOR_CST_NELTS (check_arg); i++) >> + if (!operand_equal_p (vector_cst_elt (check_arg, i), first_elt, 0)) >> + { >> + is_splat = false; >> + break; >> + } >> + >> + if (is_splat) >> + { >> + int n_elts = VECTOR_CST_NELTS (arg1); >> + tree_vector_builder elts (unsigned_arg1_type, n_elts, 1); >> + for (int i = 0; i < n_elts; i++) >> + elts.safe_push (build_int_cst (unsigned_element_type, -1)); >> + tree new_arg1 = elts.build (); >> + gimple *g = gimple_build_assign (lhs, subcode, arg0, new_arg1); >> + gimple_set_location (g, loc); >> + gsi_replace (gsi, g, true); >> + return true; >> + } >> + } >> + return false; >> +} >> + >> /* Fold a machine-dependent built-in in GIMPLE. (For folding into >> a constant, use rs6000_fold_builtin.) */ >> bool >> @@ -1720,6 +1782,11 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) >> tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type)); >> loc = gimple_location (stmt); >> lhs = gimple_call_lhs (stmt); >> + >> + if (rs6000_optimize_vector_bitwidth_shift (gsi, arg0, arg1, lhs, loc, >> LSHIFT_EXPR)) >> + { >> + return true; >> + } >> /* Force arg1 into the range valid matching the arg0 type. */ >> /* Build a vector consisting of the max valid bit-size values. */ >> int n_elts = VECTOR_CST_NELTS (arg1); >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr119912.c >> b/gcc/testsuite/gcc.target/powerpc/pr119912.c >> new file mode 100644 >> index 00000000000..d1802bba801 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr119912.c >> @@ -0,0 +1,18 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-mdejagnu-cpu=power8 -mvsx -O2" } */ >> + >> +#include <altivec.h> >> + >> +vector unsigned int shlw(vector unsigned int in) >> +{ >> + return vec_sl(in, (vector unsigned int)vec_splats((unsigned char)31)); >> +} >> + >> +vector unsigned long long shld(vector unsigned long long in) >> +{ >> + return vec_sl(in, (vector unsigned long long)vec_splats(63)); >> +} >> + >> +/* { dg-final { scan-assembler-times {\mvspltis[bhwd] [0-9]+,-1\M} 2 } } */ >> +/* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 2 } } */ >> +/* { dg-final { scan-assembler-times {\mlvx\M} 0 } } */ >> >
