Hi Carl,

Some minor comments are inlined on top of Segher's and Peter's comments.

on 2024/7/20 04:04, Carl Love wrote:
> GCC developers:
> 
> The following patch adds the int128 varients to the existing overloaded 
> built-ins vec_sld, vec_sldb, vec_sldw, vec_sll, vec_slo, vec_srdb, vec_srl, 
> vec_sro.  These varients were requested by Steve Munroe.
> 
> The patch has been tested on a Power 10 system with no regressions.
> 
> Please let me know if the patch is acceptable for mainline.
> 
>                                    Carl
> 
> 
> -------------------------------------------------------------------------------------------------------
>  rs6000, Add new overloaded vector shift builtin int128 varients
> 
> Add the signed __int128 and unsigned __int128 argument types for the
> overloaded built-ins vec_sld, vec_sldb, vec_sldw, vec_sll, vec_slo,
> vec_srdb, vec_srl, vec_sro.  For each of the new argument types add a
> testcase and update the documentation for the built-in.
> 
> Add the missing internal names for the float and double types for
> overloaded builtin vec_sld for the float and double types.

This isn't needed, see below explanation.

> 
> gcc/ChangeLog:
>     * config/rs6000/altivec.md (vs<SLDB_lr>db_<mode>): Change
>     define_insn iterator to VEC_IC.
>     * config/rs6000/rs6000-builtins.def (__builtin_altivec_vsldoi_v1ti,
>     __builtin_vsx_xxsldwi_v1ti, __builtin_altivec_vsldb_v1ti,
>     __builtin_altivec_vsrdb_v1ti): New builtin definitions.
>     * config/rs6000/rs6000-overload.def (vec_sld, vec_sldb, vec_sldw,
>     vec_sll, vec_slo, vec_srdb, vec_srl, vec_sro): New overloaded
>     definitions.
>     (vec_sld): Add missing internal names.
>     * doc/extend.texi (vec_sld, vec_sldb, vec_sldw,    vec_sll, vec_slo,
>     vec_srdb, vec_srl, vec_sro): Add documentation for new overloaded
>     built-ins.
> 
> gcc/testsuite/ChangeLog:
>     * gcc.target/powerpc/vec-shift-double-runnable-int128.c: New test
>     file.
> ---
>  gcc/config/rs6000/altivec.md                  |   6 +-
>  gcc/config/rs6000/rs6000-builtins.def         |  12 +
>  gcc/config/rs6000/rs6000-overload.def         |  44 ++-
>  gcc/doc/extend.texi                           |  42 +++
>  .../vec-shift-double-runnable-int128.c        | 349 ++++++++++++++++++
>  5 files changed, 448 insertions(+), 5 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/vec-shift-double-runnable-int128.c
> 
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 5af9bf920a2..2a18ee44526 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -878,9 +878,9 @@ (define_int_attr SLDB_lr [(UNSPEC_SLDB "l")
>  (define_int_iterator VSHIFT_DBL_LR [UNSPEC_SLDB UNSPEC_SRDB])
> 
>  (define_insn "vs<SLDB_lr>db_<mode>"
> - [(set (match_operand:VI2 0 "register_operand" "=v")
> -  (unspec:VI2 [(match_operand:VI2 1 "register_operand" "v")
> -           (match_operand:VI2 2 "register_operand" "v")
> + [(set (match_operand:VEC_IC 0 "register_operand" "=v")
> +  (unspec:VEC_IC [(match_operand:VEC_IC 1 "register_operand" "v")
> +           (match_operand:VEC_IC 2 "register_operand" "v")
>             (match_operand:QI 3 "const_0_to_12_operand" "n")]
>            VSHIFT_DBL_LR))]
>    "TARGET_POWER10"
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index 77eb0f7e406..fbb6e1ddf85 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -964,6 +964,9 @@
>    const vss __builtin_altivec_vsldoi_8hi (vss, vss, const int<4>);
>      VSLDOI_8HI altivec_vsldoi_v8hi {}
> 
> +  const vsq __builtin_altivec_vsldoi_v1ti (vsq, vsq, const int<4>);
> +    VSLDOI_V1TI altivec_vsldoi_v1ti {}
> +
>    const vss __builtin_altivec_vslh (vss, vus);
>      VSLH vashlv8hi3 {}
> 
> @@ -1831,6 +1834,9 @@
>    const vsll __builtin_vsx_xxsldwi_2di (vsll, vsll, const int<2>);
>      XXSLDWI_2DI vsx_xxsldwi_v2di {}
> 
> +  const vsq __builtin_vsx_xxsldwi_v1ti (vsq, vsq, const int<2>);
> +    XXSLDWI_Q vsx_xxsldwi_v1ti {}
> +
>    const vf __builtin_vsx_xxsldwi_4sf (vf, vf, const int<2>);
>      XXSLDWI_4SF vsx_xxsldwi_v4sf {}
> 
> @@ -3299,6 +3305,9 @@
>    const vss __builtin_altivec_vsldb_v8hi (vss, vss, const int<3>);
>      VSLDB_V8HI vsldb_v8hi {}
> 
> +  const vsq __builtin_altivec_vsldb_v1ti (vsq, vsq, const int<3>);
> +    VSLDB_V1TI vsldb_v1ti {}
> +
>    const vsq __builtin_altivec_vslq (vsq, vuq);
>      VSLQ vashlv1ti3 {}
> 
> @@ -3317,6 +3326,9 @@
>    const vss __builtin_altivec_vsrdb_v8hi (vss, vss, const int<3>);
>      VSRDB_V8HI vsrdb_v8hi {}
> 
> +  const vsq __builtin_altivec_vsrdb_v1ti (vsq, vsq, const int<3>);
> +    VSRDB_V1TI vsrdb_v1ti {}
> +
>    const vsq __builtin_altivec_vsrq (vsq, vuq);
>      VSRQ vlshrv1ti3 {}
> 
> diff --git a/gcc/config/rs6000/rs6000-overload.def 
> b/gcc/config/rs6000/rs6000-overload.def
> index c4ecafc6f7e..302e0232533 100644
> --- a/gcc/config/rs6000/rs6000-overload.def
> +++ b/gcc/config/rs6000/rs6000-overload.def
> @@ -3396,9 +3396,13 @@
>    vull __builtin_vec_sld (vull, vull, const int);
>      VSLDOI_2DI  VSLDOI_VULL
>    vf __builtin_vec_sld (vf, vf, const int);
> -    VSLDOI_4SF
> +    VSLDOI_4SF VSLDOI_VF
>    vd __builtin_vec_sld (vd, vd, const int);
> -    VSLDOI_2DF
> +    VSLDOI_2DF VSLDOI_VD

The other instances for vector integer type have multiple uses of 1st token,
such as:

  vsll __builtin_vec_sld (vsll, vsll, const int);
    VSLDOI_2DI  VSLDOI_VSLL
  vbll __builtin_vec_sld (vbll, vbll, const int);
    VSLDOI_2DI  VSLDOI_VBLL
  vull __builtin_vec_sld (vull, vull, const int);
    VSLDOI_2DI  VSLDOI_VULL

, it's unable to use the 1st token VSLDOI_2DI for the overload id (otherwise
it can be ambiguous), but for vector float/double they don't have multiple
variants, VSLDOI_4SF and VSLDOI_2DF are used once respectively so they are
fine here.  I think the existing code is intentional so let's keep them
unchanged (creating more unnecessary ids is slightly worse than before).

> +  vsq __builtin_vec_sld (vsq, vsq, const int);
> +    VSLDOI_V1TI  VSLDOI_VSQ
> +  vuq __builtin_vec_sld (vuq, vuq, const int);
> +    VSLDOI_V1TI  VSLDOI_VUQ
> 
>  [VEC_SLDB, vec_sldb, __builtin_vec_sldb]
>    vsc __builtin_vec_sldb (vsc, vsc, const int);
> @@ -3417,6 +3421,10 @@
>      VSLDB_V2DI  VSLDB_VSLL
>    vull __builtin_vec_sldb (vull, vull, const int);
>      VSLDB_V2DI  VSLDB_VULL
> +  vsq __builtin_vec_sldb (vsq, vsq, const int);
> +    VSLDB_V1TI  VSLDB_VSQ
> +  vuq __builtin_vec_sldb (vuq, vuq, const int);
> +    VSLDB_V1TI  VSLDB_VUQ
> 
>  [VEC_SLDW, vec_sldw, __builtin_vec_sldw]
>    vsc __builtin_vec_sldw (vsc, vsc, const int);
> @@ -3439,6 +3447,10 @@
>      XXSLDWI_4SF  XXSLDWI_VF
>    vd __builtin_vec_sldw (vd, vd, const int);
>      XXSLDWI_2DF  XXSLDWI_VD
> +  vsq __builtin_vec_sldw (vsq, vsq, const int);
> +    XXSLDWI_Q  XXSLDWI_VSQ
> +  vuq __builtin_vec_sldw (vuq, vuq, const int);
> +    XXSLDWI_Q  XXSLDWI_VUQ

Nit: s/XXSLDWI_Q/XXSLDWI_1TI/ to keep consistent with the
other XXSLDWI_* as 1st token (XXSLDWI_16QI etc. are used
above rather than XXSLDWI_{SC,UC} etc.)

> 
>  [VEC_SLL, vec_sll, __builtin_vec_sll]
>    vsc __builtin_vec_sll (vsc, vuc);
> @@ -3459,6 +3471,10 @@
>      VSL  VSL_VSLL
>    vull __builtin_vec_sll (vull, vuc);
>      VSL  VSL_VULL
> +  vsq __builtin_vec_sll (vsq, vuc);
> +    VSL  VSL_VSQ
> +  vuq __builtin_vec_sll (vuq, vuc);
> +    VSL  VSL_VUQ
>  ; The following variants are deprecated.
>    vsc __builtin_vec_sll (vsc, vus);
>      VSL  VSL_VSC_VUS
> @@ -3554,6 +3570,14 @@
>      VSLO  VSLO_VFS
>    vf __builtin_vec_slo (vf, vuc);
>      VSLO  VSLO_VFU
> +  vsq __builtin_vec_slo (vsq, vsc);
> +    VSLO  VSLDO_VSQS
> +  vsq __builtin_vec_slo (vsq, vuc);
> +    VSLO  VSLDO_VSQU
> +  vuq __builtin_vec_slo (vuq, vsc);
> +    VSLO  VSLDO_VUQS
> +  vuq __builtin_vec_slo (vuq, vuc);
> +    VSLO  VSLDO_VUQU
> 
>  [VEC_SLV, vec_slv, __builtin_vec_vslv]
>    vuc __builtin_vec_vslv (vuc, vuc);
> @@ -3699,6 +3723,10 @@
>      VSRDB_V2DI  VSRDB_VSLL
>    vull __builtin_vec_srdb (vull, vull, const int);
>      VSRDB_V2DI  VSRDB_VULL
> +  vsq __builtin_vec_srdb (vsq, vsq, const int);
> +    VSRDB_V1TI  VSRDB_VSQ
> +  vuq __builtin_vec_srdb (vuq, vuq, const int);
> +    VSRDB_V1TI  VSRDB_VUQ
> 
>  [VEC_SRL, vec_srl, __builtin_vec_srl]
>    vsc __builtin_vec_srl (vsc, vuc);
> @@ -3719,6 +3747,10 @@
>      VSR  VSR_VSLL
>    vull __builtin_vec_srl (vull, vuc);
>      VSR  VSR_VULL
> +  vsq __builtin_vec_srl (vsq, vuc);
> +    VSR  VSR_VSQ
> +  vuq __builtin_vec_srl (vuq, vuc);
> +    VSR  VSR_VUQ
>  ; The following variants are deprecated.
>    vsc __builtin_vec_srl (vsc, vus);
>      VSR  VSR_VSC_VUS
> @@ -3808,6 +3840,14 @@
>      VSRO  VSRO_VFS
>    vf __builtin_vec_sro (vf, vuc);
>      VSRO  VSRO_VFU
> +  vsq __builtin_vec_sro (vsq, vsc);
> +    VSRO  VSRDO_VSQS
> +  vsq __builtin_vec_sro (vsq, vuc);
> +    VSRO  VSRDO_VSQU
> +  vuq __builtin_vec_sro (vuq, vsc);
> +    VSRO  VSRDO_VUQS
> +  vuq __builtin_vec_sro (vuq, vuc);
> +    VSRO  VSRDO_VUQU
> 
>  [VEC_SRV, vec_srv, __builtin_vec_vsrv]
>    vuc __builtin_vec_vsrv (vuc, vuc);
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 0b572afca72..5125a6d9def 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -23504,6 +23504,10 @@ const unsigned int);
>  vector signed long long, const unsigned int);
>  @exdent vector unsigned long long vec_sldb (vector unsigned long long,
>  vector unsigned long long, const unsigned int);
> +@exdent vector signed __int128 vec_sldb (vector signed __int128,
> +vector signed __int128, const unsigned int);
> +@exdent vector unsigned __int128 vec_sldb (vector unsigned __int128,
> +vector unsigned __int128, const unsigned int);
>  @end smallexample
> 
>  Shift the combined input vectors left by the amount specified by the 
> low-order
> @@ -23531,6 +23535,10 @@ const unsigned int);
>  vector signed long long, const unsigned int);
>  @exdent vector unsigned long long vec_srdb (vector unsigned long long,
>  vector unsigned long long, const unsigned int);
> +@exdent vector signed __int128 vec_srdb (vector signed __int128,
> +vector signed __int128, const unsigned int);
> +@exdent vector unsigned __int128 vec_srdb (vector unsigned __int128,
> +vector unsigned __int128, const unsigned int);
>  @end smallexample
> 
>  Shift the combined input vectors right by the amount specified by the 
> low-order
> @@ -24025,6 +24033,40 @@ int vec_any_le (vector signed __int128, vector 
> signed __int128);
>  int vec_any_le (vector unsigned __int128, vector unsigned __int128);
>  @end smallexample
> 

Personally I prefer to move the below paragraph for description here to avoid
that two @smallexample sections are placed together, that is:

"The following instances are extension of the existing overloaded built-ins
@code{vec_sld}, @code{vec_sldw}, @code{vec_slo}, @code{vec_sro}, @code{vec_srl}
that are documented in the PVIPR."

@smallexample
....

BR,
Kewen

Reply via email to