Hi Carl, on 2024/7/24 01:52, Carl Love wrote: > GCC maintainers: > > This patch removes the vsx set built-ins: __builtin_vsx_set_1ti, > __builtin_vsx_set_2df, __builtin_vsx_set_2di. With the removal of these > built-ins, the built-in attribute "set", used in the built-in definition > file, is no longer needed. The "set" and the associated code for the "set" > is removed. > > The assembly code generated by using C code to set an element of a vector > versus using the vsx set built-in to set an element was investigated. With > -O0 optimization the generated assmenly code is comparable in therms of the > generated assembly instrucitons and number of instructions. For the -O3 > optimization level, the 2DI an 2DF cases the built-ins and the C code > generate identical assembly code. The assembly code generated for the 1TI > case for the C code has one less instruction. The built-in generates an > extra load instruction. Hence, the C code is better as it has fewer load > instructions. > > The testcase for the __builtin_vsx_set_2df is removed. The other built-ins > do not have testcases. > > The patch has been tested on a Power 10 LE system with no regressions. > > Please let me know if the patch is acceptable for mainline. Thanks. > > Carl > > ---------------------------------------------------------------------------------------------------------- > rs6000, remove built-ins __builtin_vsx_set_1ti, __builtin_vsx_set_2df, > __builtin_vsx_set_2di > > The built-ins set a value in a vector. The same operation can be done > in C-code. The assembly code generated from the C-code is as good or > better than the code generated by the built-ins. With default > optimization the number of assembly generated for the two methods are > similar. With -O3 optimization, the assembly generated for the two > approaches is identical for the 2DF and 2DI types. The assembly for > the C-code version of the 1Ti requres one less assembly instruction.
Nit: s/requres/requires/ > It also only uses one load versus two loads for the built-in. > > With the removal of the built-ins, there are no other uses of the > set built-in attribute. The code associated with the set built-in > attribute is removed. > > Finally, the testcase for the __builtin_vsx_set_2df is removed. The > other built-ins do not have testcases. > > gcc/ChangeLog: > * config/rs6000/rs6000-builtin.cc (get_element_number, > altivec_expand_vec_set_builtin): Remove functions. > (rs6000_expand_builtin): Remove the if statement to call > altivec_expand_vec_set_builtin. > * config/rs6000/rs6000-builtins.def (__builtin_vsx_set_1ti, > __builtin_vsx_set_2df, __builtin_vsx_set_2di): Remove the > built-in definitions. > * config/rs6000/rs6000-gen-builtins.cc (struct attrinfo): > Remove the isset variable from the structure. > (parse_bif_attrs): Remove the uses of the isset variable. > > gcc/testsuite/ChangeLog: > * gcc.target/powerpc/vsx-builtin-3.c: Remove test cases for the > __builtin_vsx_set_2df built-in. > --- > gcc/config/rs6000/rs6000-builtin.cc | 53 ------------------- > gcc/config/rs6000/rs6000-builtins.def | 10 ---- > gcc/config/rs6000/rs6000-gen-builtins.cc | 29 ++++------ > .../gcc.target/powerpc/vsx-builtin-3.c | 6 --- > 4 files changed, 11 insertions(+), 87 deletions(-) > > diff --git a/gcc/config/rs6000/rs6000-builtin.cc > b/gcc/config/rs6000/rs6000-builtin.cc > index 117cf0125f8..099cbc82245 100644 > --- a/gcc/config/rs6000/rs6000-builtin.cc > +++ b/gcc/config/rs6000/rs6000-builtin.cc > @@ -2313,56 +2313,6 @@ altivec_expand_predicate_builtin (enum insn_code > icode, tree exp, rtx target) > return target; > } > > -/* Return the integer constant in ARG. Constrain it to be in the range > - of the subparts of VEC_TYPE; issue an error if not. */ > - > -static int > -get_element_number (tree vec_type, tree arg) > -{ > - unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; > - > - if (!tree_fits_uhwi_p (arg) > - || (elt = tree_to_uhwi (arg), elt > max)) > - { > - error ("selector must be an integer constant in the range [0, %wi]", > max); > - return 0; > - } > - > - return elt; > -} > - > -/* Expand vec_set builtin. */ > -static rtx > -altivec_expand_vec_set_builtin (tree exp) > -{ > - machine_mode tmode, mode1; > - tree arg0, arg1, arg2; > - int elt; > - rtx op0, op1; > - > - arg0 = CALL_EXPR_ARG (exp, 0); > - arg1 = CALL_EXPR_ARG (exp, 1); > - arg2 = CALL_EXPR_ARG (exp, 2); > - > - tmode = TYPE_MODE (TREE_TYPE (arg0)); > - mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); > - gcc_assert (VECTOR_MODE_P (tmode)); > - > - op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); > - op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); > - elt = get_element_number (TREE_TYPE (arg0), arg2); > - > - if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) > - op1 = convert_modes (mode1, GET_MODE (op1), op1, true); > - > - op0 = force_reg (tmode, op0); > - op1 = force_reg (mode1, op1); > - > - rs6000_expand_vector_set (op0, op1, GEN_INT (elt)); > - > - return op0; > -} > - > /* Expand vec_ext builtin. */ > static rtx > altivec_expand_vec_ext_builtin (tree exp, rtx target) > @@ -3365,9 +3315,6 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* > subtarget */, > if (bif_is_cpu (*bifaddr)) > return cpu_expand_builtin (fcode, exp, target); > > - if (bif_is_set (*bifaddr)) > - return altivec_expand_vec_set_builtin (exp); > - > if (bif_is_extract (*bifaddr)) > return altivec_expand_vec_ext_builtin (exp, target); > > diff --git a/gcc/config/rs6000/rs6000-builtins.def > b/gcc/config/rs6000/rs6000-builtins.def > index 75c33aa9ffc..646707b3bd9 100644 > --- a/gcc/config/rs6000/rs6000-builtins.def > +++ b/gcc/config/rs6000/rs6000-builtins.def > @@ -115,7 +115,6 @@ > ; > ; Attributes are strings, and the allowed ones are listed below. > ; > -; set Process as a vec_set function > ; extract Process as a vec_extract function > ; nosoft Not valid with -msoft-float > ; ldvec Needs special handling for vec_ld semantics > @@ -1401,15 +1400,6 @@ > const vsll __builtin_vsx_mul_2di (vsll, vsll); > MUL_V2DI vsx_mul_v2di {} > > - const vsq __builtin_vsx_set_1ti (vsq, signed __int128, const int<0,0>); > - SET_1TI vsx_set_v1ti {set} > - > - const vd __builtin_vsx_set_2df (vd, double, const int<0,1>); > - SET_2DF vsx_set_v2df {set} > - > - const vsll __builtin_vsx_set_2di (vsll, signed long long, const int<0,1>); > - SET_2DI vsx_set_v2di {set} > - > const vd __builtin_vsx_splat_2df (double); > SPLAT_2DF vsx_splat_v2df {} > > diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc > b/gcc/config/rs6000/rs6000-gen-builtins.cc > index 7856d46cd3e..13752d482f9 100644 > --- a/gcc/config/rs6000/rs6000-gen-builtins.cc > +++ b/gcc/config/rs6000/rs6000-gen-builtins.cc > @@ -370,7 +370,6 @@ struct typelist > /* Attributes of a builtin function. */ > struct attrinfo > { > - bool isset; > bool isextract; > bool isnosoft; > bool isldvec; > @@ -1394,9 +1393,7 @@ parse_bif_attrs (attrinfo *attrptr) > attrname = match_identifier (); > if (attrname) > { > - if (!strcmp (attrname, "set")) > - attrptr->isset = 1; > - else if (!strcmp (attrname, "extract")) > + if (!strcmp (attrname, "extract")) > attrptr->isextract = 1; > else if (!strcmp (attrname, "nosoft")) > attrptr->isnosoft = 1; > @@ -1469,14 +1466,14 @@ parse_bif_attrs (attrinfo *attrptr) > > #ifdef DEBUG > diag (0, > - "attribute set: set = %d, extract = %d, nosoft = %d, " > - "ldvec = %d, stvec = %d, reve = %d, pred = %d, htm = %d, " > - "htmspr = %d, htmcr = %d, mma = %d, quad = %d, pair = %d, " > - "mmaint = %d, no32bit = %d, 32bit = %d, cpu = %d, ldstmask = %d, " > - "lxvrse = %d, lxvrze = %d, endian = %d, ibmdld = %d, ibm128 = %d.\n", > - attrptr->isset, attrptr->isextract, attrptr->isnosoft, > - attrptr->isldvec, attrptr->isstvec, attrptr->isreve, attrptr->ispred, > - attrptr->ishtm, attrptr->ishtmspr, attrptr->ishtmcr, attrptr->ismma, > + "extract = %d, nosoft = %d, ldvec = %d, stvec = %d, reve = %d, " > + "pred = %d, htm = %d, htmspr = %d, htmcr = %d, mma = %d, " > + "quad = %d, pair = %d, mmaint = %d, no32bit = %d, 32bit = %d, " > + "cpu = %d, ldstmask = %d, lxvrse = %d, lxvrze = %d, endian = %d, " > + "ibmdld = %d, ibm128 = %d.\n", > + attrptr->isextract, attrptr->isnosoft,attrptr->isldvec, > + attrptr->isstvec, attrptr->isreve, attrptr->ispred, attrptr->ishtm, > + attrptr->ishtmspr, attrptr->ishtmcr, attrptr->ismma, > attrptr->isquad, attrptr->ispair, attrptr->ismmaint, > attrptr->isno32bit, attrptr->is32bit, attrptr->iscpu, > attrptr->isldstmask, attrptr->islxvrse, attrptr->islxvrze, > @@ -2271,8 +2268,7 @@ write_decls (void) > fprintf (header_file, " rs6000_gen_builtins assoc_bif;\n"); > fprintf (header_file, "};\n\n"); > > - /* Bit pattern 0x00000001 is available. */ > - fprintf (header_file, "#define bif_set_bit\t\t(0x00000002)\n"); > + /* Bit patterns 0x00000001 and 0x00000002 are available. */ > fprintf (header_file, "#define bif_extract_bit\t\t(0x00000004)\n"); > fprintf (header_file, "#define bif_nosoft_bit\t\t(0x00000008)\n"); > fprintf (header_file, "#define bif_ldvec_bit\t\t(0x00000010)\n"); > @@ -2296,8 +2292,6 @@ write_decls (void) > fprintf (header_file, "#define bif_ibmld_bit\t\t(0x00400000)\n"); > fprintf (header_file, "#define bif_ibm128_bit\t\t(0x00800000)\n"); > fprintf (header_file, "\n"); > - fprintf (header_file, > - "#define bif_is_set(x)\t\t((x).bifattrs & bif_set_bit)\n"); > fprintf (header_file, > "#define bif_is_extract(x)\t((x).bifattrs & bif_extract_bit)\n"); > fprintf (header_file, > @@ -2497,10 +2491,9 @@ write_bif_static_init (void) > fprintf (init_file, " /* nargs */\t%d,\n", > bifp->proto.nargs); > fprintf (init_file, " /* bifattrs */\t0"); > - if (bifp->attrs.isset) > - fprintf (init_file, " | bif_set_bit"); > if (bifp->attrs.isextract) > fprintf (init_file, " | bif_extract_bit"); > + Nit: unnecessary empty line. OK for trunk with the nits fixed, thanks! BR, Kewen > if (bifp->attrs.isnosoft) > fprintf (init_file, " | bif_nosoft_bit"); > if (bifp->attrs.isldvec) > diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c > b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c > index d67f97c8011..67c93be1469 100644 > --- a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c > +++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c > @@ -118,12 +118,6 @@ void do_concat (void) > d[0][0] = __builtin_vsx_concat_2df (x, y); > } > > -void do_set (void) > -{ > - d[0][0] = __builtin_vsx_set_2df (d[0][1], x, 0); > - d[1][0] = __builtin_vsx_set_2df (d[1][1], y, 1); > -} > - > extern double z[][4]; > > int do_math (void)