Hi Carl,
on 2024/7/24 01:52, Carl Love wrote:
> GCC maintainers:
>
> This patch removes the vsx set built-ins: __builtin_vsx_set_1ti,
> __builtin_vsx_set_2df, __builtin_vsx_set_2di. With the removal of these
> built-ins, the built-in attribute "set", used in the built-in definition
> file, is no longer needed. The "set" and the associated code for the "set"
> is removed.
>
> The assembly code generated by using C code to set an element of a vector
> versus using the vsx set built-in to set an element was investigated. With
> -O0 optimization the generated assmenly code is comparable in therms of the
> generated assembly instrucitons and number of instructions. For the -O3
> optimization level, the 2DI an 2DF cases the built-ins and the C code
> generate identical assembly code. The assembly code generated for the 1TI
> case for the C code has one less instruction. The built-in generates an
> extra load instruction. Hence, the C code is better as it has fewer load
> instructions.
>
> The testcase for the __builtin_vsx_set_2df is removed. The other built-ins
> do not have testcases.
>
> The patch has been tested on a Power 10 LE system with no regressions.
>
> Please let me know if the patch is acceptable for mainline. Thanks.
>
> Carl
>
> ----------------------------------------------------------------------------------------------------------
> rs6000, remove built-ins __builtin_vsx_set_1ti, __builtin_vsx_set_2df,
> __builtin_vsx_set_2di
>
> The built-ins set a value in a vector. The same operation can be done
> in C-code. The assembly code generated from the C-code is as good or
> better than the code generated by the built-ins. With default
> optimization the number of assembly generated for the two methods are
> similar. With -O3 optimization, the assembly generated for the two
> approaches is identical for the 2DF and 2DI types. The assembly for
> the C-code version of the 1Ti requres one less assembly instruction.
Nit: s/requres/requires/
> It also only uses one load versus two loads for the built-in.
>
> With the removal of the built-ins, there are no other uses of the
> set built-in attribute. The code associated with the set built-in
> attribute is removed.
>
> Finally, the testcase for the __builtin_vsx_set_2df is removed. The
> other built-ins do not have testcases.
>
> gcc/ChangeLog:
> * config/rs6000/rs6000-builtin.cc (get_element_number,
> altivec_expand_vec_set_builtin): Remove functions.
> (rs6000_expand_builtin): Remove the if statement to call
> altivec_expand_vec_set_builtin.
> * config/rs6000/rs6000-builtins.def (__builtin_vsx_set_1ti,
> __builtin_vsx_set_2df, __builtin_vsx_set_2di): Remove the
> built-in definitions.
> * config/rs6000/rs6000-gen-builtins.cc (struct attrinfo):
> Remove the isset variable from the structure.
> (parse_bif_attrs): Remove the uses of the isset variable.
>
> gcc/testsuite/ChangeLog:
> * gcc.target/powerpc/vsx-builtin-3.c: Remove test cases for the
> __builtin_vsx_set_2df built-in.
> ---
> gcc/config/rs6000/rs6000-builtin.cc | 53 -------------------
> gcc/config/rs6000/rs6000-builtins.def | 10 ----
> gcc/config/rs6000/rs6000-gen-builtins.cc | 29 ++++------
> .../gcc.target/powerpc/vsx-builtin-3.c | 6 ---
> 4 files changed, 11 insertions(+), 87 deletions(-)
>
> diff --git a/gcc/config/rs6000/rs6000-builtin.cc
> b/gcc/config/rs6000/rs6000-builtin.cc
> index 117cf0125f8..099cbc82245 100644
> --- a/gcc/config/rs6000/rs6000-builtin.cc
> +++ b/gcc/config/rs6000/rs6000-builtin.cc
> @@ -2313,56 +2313,6 @@ altivec_expand_predicate_builtin (enum insn_code
> icode, tree exp, rtx target)
> return target;
> }
>
> -/* Return the integer constant in ARG. Constrain it to be in the range
> - of the subparts of VEC_TYPE; issue an error if not. */
> -
> -static int
> -get_element_number (tree vec_type, tree arg)
> -{
> - unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
> -
> - if (!tree_fits_uhwi_p (arg)
> - || (elt = tree_to_uhwi (arg), elt > max))
> - {
> - error ("selector must be an integer constant in the range [0, %wi]",
> max);
> - return 0;
> - }
> -
> - return elt;
> -}
> -
> -/* Expand vec_set builtin. */
> -static rtx
> -altivec_expand_vec_set_builtin (tree exp)
> -{
> - machine_mode tmode, mode1;
> - tree arg0, arg1, arg2;
> - int elt;
> - rtx op0, op1;
> -
> - arg0 = CALL_EXPR_ARG (exp, 0);
> - arg1 = CALL_EXPR_ARG (exp, 1);
> - arg2 = CALL_EXPR_ARG (exp, 2);
> -
> - tmode = TYPE_MODE (TREE_TYPE (arg0));
> - mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
> - gcc_assert (VECTOR_MODE_P (tmode));
> -
> - op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
> - op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
> - elt = get_element_number (TREE_TYPE (arg0), arg2);
> -
> - if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
> - op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
> -
> - op0 = force_reg (tmode, op0);
> - op1 = force_reg (mode1, op1);
> -
> - rs6000_expand_vector_set (op0, op1, GEN_INT (elt));
> -
> - return op0;
> -}
> -
> /* Expand vec_ext builtin. */
> static rtx
> altivec_expand_vec_ext_builtin (tree exp, rtx target)
> @@ -3365,9 +3315,6 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /*
> subtarget */,
> if (bif_is_cpu (*bifaddr))
> return cpu_expand_builtin (fcode, exp, target);
>
> - if (bif_is_set (*bifaddr))
> - return altivec_expand_vec_set_builtin (exp);
> -
> if (bif_is_extract (*bifaddr))
> return altivec_expand_vec_ext_builtin (exp, target);
>
> diff --git a/gcc/config/rs6000/rs6000-builtins.def
> b/gcc/config/rs6000/rs6000-builtins.def
> index 75c33aa9ffc..646707b3bd9 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -115,7 +115,6 @@
> ;
> ; Attributes are strings, and the allowed ones are listed below.
> ;
> -; set Process as a vec_set function
> ; extract Process as a vec_extract function
> ; nosoft Not valid with -msoft-float
> ; ldvec Needs special handling for vec_ld semantics
> @@ -1401,15 +1400,6 @@
> const vsll __builtin_vsx_mul_2di (vsll, vsll);
> MUL_V2DI vsx_mul_v2di {}
>
> - const vsq __builtin_vsx_set_1ti (vsq, signed __int128, const int<0,0>);
> - SET_1TI vsx_set_v1ti {set}
> -
> - const vd __builtin_vsx_set_2df (vd, double, const int<0,1>);
> - SET_2DF vsx_set_v2df {set}
> -
> - const vsll __builtin_vsx_set_2di (vsll, signed long long, const int<0,1>);
> - SET_2DI vsx_set_v2di {set}
> -
> const vd __builtin_vsx_splat_2df (double);
> SPLAT_2DF vsx_splat_v2df {}
>
> diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc
> b/gcc/config/rs6000/rs6000-gen-builtins.cc
> index 7856d46cd3e..13752d482f9 100644
> --- a/gcc/config/rs6000/rs6000-gen-builtins.cc
> +++ b/gcc/config/rs6000/rs6000-gen-builtins.cc
> @@ -370,7 +370,6 @@ struct typelist
> /* Attributes of a builtin function. */
> struct attrinfo
> {
> - bool isset;
> bool isextract;
> bool isnosoft;
> bool isldvec;
> @@ -1394,9 +1393,7 @@ parse_bif_attrs (attrinfo *attrptr)
> attrname = match_identifier ();
> if (attrname)
> {
> - if (!strcmp (attrname, "set"))
> - attrptr->isset = 1;
> - else if (!strcmp (attrname, "extract"))
> + if (!strcmp (attrname, "extract"))
> attrptr->isextract = 1;
> else if (!strcmp (attrname, "nosoft"))
> attrptr->isnosoft = 1;
> @@ -1469,14 +1466,14 @@ parse_bif_attrs (attrinfo *attrptr)
>
> #ifdef DEBUG
> diag (0,
> - "attribute set: set = %d, extract = %d, nosoft = %d, "
> - "ldvec = %d, stvec = %d, reve = %d, pred = %d, htm = %d, "
> - "htmspr = %d, htmcr = %d, mma = %d, quad = %d, pair = %d, "
> - "mmaint = %d, no32bit = %d, 32bit = %d, cpu = %d, ldstmask = %d, "
> - "lxvrse = %d, lxvrze = %d, endian = %d, ibmdld = %d, ibm128 = %d.\n",
> - attrptr->isset, attrptr->isextract, attrptr->isnosoft,
> - attrptr->isldvec, attrptr->isstvec, attrptr->isreve, attrptr->ispred,
> - attrptr->ishtm, attrptr->ishtmspr, attrptr->ishtmcr, attrptr->ismma,
> + "extract = %d, nosoft = %d, ldvec = %d, stvec = %d, reve = %d, "
> + "pred = %d, htm = %d, htmspr = %d, htmcr = %d, mma = %d, "
> + "quad = %d, pair = %d, mmaint = %d, no32bit = %d, 32bit = %d, "
> + "cpu = %d, ldstmask = %d, lxvrse = %d, lxvrze = %d, endian = %d, "
> + "ibmdld = %d, ibm128 = %d.\n",
> + attrptr->isextract, attrptr->isnosoft,attrptr->isldvec,
> + attrptr->isstvec, attrptr->isreve, attrptr->ispred, attrptr->ishtm,
> + attrptr->ishtmspr, attrptr->ishtmcr, attrptr->ismma,
> attrptr->isquad, attrptr->ispair, attrptr->ismmaint,
> attrptr->isno32bit, attrptr->is32bit, attrptr->iscpu,
> attrptr->isldstmask, attrptr->islxvrse, attrptr->islxvrze,
> @@ -2271,8 +2268,7 @@ write_decls (void)
> fprintf (header_file, " rs6000_gen_builtins assoc_bif;\n");
> fprintf (header_file, "};\n\n");
>
> - /* Bit pattern 0x00000001 is available. */
> - fprintf (header_file, "#define bif_set_bit\t\t(0x00000002)\n");
> + /* Bit patterns 0x00000001 and 0x00000002 are available. */
> fprintf (header_file, "#define bif_extract_bit\t\t(0x00000004)\n");
> fprintf (header_file, "#define bif_nosoft_bit\t\t(0x00000008)\n");
> fprintf (header_file, "#define bif_ldvec_bit\t\t(0x00000010)\n");
> @@ -2296,8 +2292,6 @@ write_decls (void)
> fprintf (header_file, "#define bif_ibmld_bit\t\t(0x00400000)\n");
> fprintf (header_file, "#define bif_ibm128_bit\t\t(0x00800000)\n");
> fprintf (header_file, "\n");
> - fprintf (header_file,
> - "#define bif_is_set(x)\t\t((x).bifattrs & bif_set_bit)\n");
> fprintf (header_file,
> "#define bif_is_extract(x)\t((x).bifattrs & bif_extract_bit)\n");
> fprintf (header_file,
> @@ -2497,10 +2491,9 @@ write_bif_static_init (void)
> fprintf (init_file, " /* nargs */\t%d,\n",
> bifp->proto.nargs);
> fprintf (init_file, " /* bifattrs */\t0");
> - if (bifp->attrs.isset)
> - fprintf (init_file, " | bif_set_bit");
> if (bifp->attrs.isextract)
> fprintf (init_file, " | bif_extract_bit");
> +
Nit: unnecessary empty line.
OK for trunk with the nits fixed, thanks!
BR,
Kewen
> if (bifp->attrs.isnosoft)
> fprintf (init_file, " | bif_nosoft_bit");
> if (bifp->attrs.isldvec)
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
> b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
> index d67f97c8011..67c93be1469 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
> @@ -118,12 +118,6 @@ void do_concat (void)
> d[0][0] = __builtin_vsx_concat_2df (x, y);
> }
>
> -void do_set (void)
> -{
> - d[0][0] = __builtin_vsx_set_2df (d[0][1], x, 0);
> - d[1][0] = __builtin_vsx_set_2df (d[1][1], y, 1);
> -}
> -
> extern double z[][4];
>
> int do_math (void)