Ping!

please review.

Thanks & Regards
Jeevitha

On 26/08/25 6:42 pm, jeevitha wrote:
> 
> Hi All,
> 
> The following patch has been bootstrapped and regtested on powerpc64le-linux.
> 
> Previously, vec_slo/vec_sll always default to V4SI, inserting unwanted
> VIEW_CONVERT_EXPR int casts. This caused widening of char/short vectors, 
> constants
> exceeding vspltisb/xxspltib range.
> 
> For example:
>     vui8_t vra, tmp;
>     _2 = VIEW_CONVERT_EXPR<__vector signed int>(vra);
>     _3 = VIEW_CONVERT_EXPR<__vector signed int>(tmp);
>     _4 = __builtin_altivec_vslo_v16qi(_2, _3);
> 
> With this patch, vec_slo/vec_sll now select the correct vector type based on
> their arguments. For example:
> 
>     vui8_t vra, tmp;
>     _2 = VIEW_CONVERT_EXPR<__vector signed char>(vra);
>     _3 = VIEW_CONVERT_EXPR<__vector signed char>(tmp);
>     _4 = __builtin_altivec_vslo_v16qi(_2, _3);
> 
> This ensures proper handling across all supported modes (V16QI, V8HI, V4SI,
> V2DI, V1TI, V4SF). Mode-specific builtins for vsl and vslo were added to avoid
> unnecessary casting.
> 
> 2025-08-26  Jeevitha Palanisamy  <jeevi...@linux.ibm.com>
> 
> gcc/
>       PR target/118480
>       PR target/117818
>       * config/rs6000/altivec.md (altivec_vslo_<mode>): New define_insn.
>       (altivec_vsl_<mode>): New define_insn.
>       * config/rs6000/rs6000-builtins.def: Add builtins for vsl/vslo with
>       mode-specific support.
>       * config/rs6000/rs6000-overload.def: Update vec_sll/vec_slo overloads
>       to use new mode-specific variants.
> 
> gcc/testsuite/
>       PR target/118480
>       PR target/117818
>       * gcc.target/powerpc/pr118480-3.c: New test.
>       * gcc.target/powerpc/pr117818-1.c: New test.
> 
> 
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 7edc288a656..c11acd30870 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -54,6 +54,7 @@
>     UNSPEC_VPACK_UNS_UNS_MOD
>     UNSPEC_VPACK_UNS_UNS_MOD_DIRECT
>     UNSPEC_VREVEV
> +   UNSPEC_VSL
>     UNSPEC_VSLV4SI
>     UNSPEC_VSLO
>     UNSPEC_VSR
> @@ -2071,6 +2072,15 @@
>    "vrlqnm %0,%1,%2"
>    [(set_attr "type" "veclogical")])
>  
> +(define_insn "altivec_vsl_<mode>"
> +  [(set (match_operand:VSX_MM 0 "register_operand" "=v")
> +        (unspec:VSX_MM [(match_operand:VSX_MM 1 "register_operand" "v")
> +                        (match_operand:V16QI 2 "register_operand" "v")]
> +                       UNSPEC_VSL))]
> +  "TARGET_ALTIVEC"
> +  "vsl %0,%1,%2"
> +  [(set_attr "type" "vecperm")])
> +
>  (define_insn "altivec_vsl"
>    [(set (match_operand:V4SI 0 "register_operand" "=v")
>          (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
> @@ -2080,11 +2090,11 @@
>    "vsl %0,%1,%2"
>    [(set_attr "type" "vecperm")])
>  
> -(define_insn "altivec_vslo"
> -  [(set (match_operand:V4SI 0 "register_operand" "=v")
> -        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
> -                      (match_operand:V4SI 2 "register_operand" "v")]
> -                  UNSPEC_VSLO))]
> +(define_insn "altivec_vslo_<mode>"
> +  [(set (match_operand:VM 0 "register_operand" "=v")
> +        (unspec:VM [(match_operand:VM 1 "register_operand" "v")
> +                    (match_operand:V16QI 2 "register_operand" "v")]
> +                   UNSPEC_VSLO))]
>    "TARGET_ALTIVEC"
>    "vslo %0,%1,%2"
>    [(set_attr "type" "vecperm")])
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index 555d7d58950..c7622f11816 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -948,6 +948,21 @@
>    const vsi __builtin_altivec_vsl (vsi, vsi);
>      VSL altivec_vsl {}
>  
> +  const vsc __builtin_altivec_vsl_v16qi (vsc, vsc);
> +    VSL_16QI altivec_vsl_v16qi {}
> +
> +  const vss __builtin_altivec_vsl_v8hi (vss, vsc);
> +    VSL_8HI altivec_vsl_v8hi {}
> +
> +  const vsi __builtin_altivec_vsl_v4si (vsi, vsc);
> +    VSL_4SI altivec_vsl_v4si {}
> +
> +  const vsll __builtin_altivec_vsl_v2di (vsll, vsc);
> +    VSL_2DI altivec_vsl_v2di {}
> +
> +  const vsq __builtin_altivec_vsl_v1ti (vsq, vsc);
> +    VSL_1TI altivec_vsl_v1ti {}
> +
>    const vsc __builtin_altivec_vslb (vsc, vuc);
>      VSLB vashlv16qi3 {}
>  
> @@ -969,8 +984,23 @@
>    const vss __builtin_altivec_vslh (vss, vus);
>      VSLH vashlv8hi3 {}
>  
> -  const vsi __builtin_altivec_vslo (vsi, vsi);
> -    VSLO altivec_vslo {}
> +  const vsc __builtin_altivec_vslo_v16qi (vsc, vsc);
> +    VSLO_16QI altivec_vslo_v16qi {}
> +
> +  const vss __builtin_altivec_vslo_v8hi (vss, vsc);
> +    VSLO_8HI altivec_vslo_v8hi {}
> +
> +  const vf __builtin_altivec_vslo_v4sf (vf, vsc);
> +    VSLO_4SF altivec_vslo_v4sf {}
> +
> +  const vsi __builtin_altivec_vslo_v4si (vsi, vsc);
> +    VSLO_4SI altivec_vslo_v4si {}
> +
> +  const vsll __builtin_altivec_vslo_v2di (vsll, vsc);
> +    VSLO_2DI altivec_vslo_v2di {}
> +
> +  const vsq __builtin_altivec_vslo_v1ti (vsq, vsc);
> +    VSLO_1TI altivec_vslo_v1ti {}
>  
>    const vsi __builtin_altivec_vslw (vsi, vui);
>      VSLW vashlv4si3 {}
> diff --git a/gcc/config/rs6000/rs6000-overload.def 
> b/gcc/config/rs6000/rs6000-overload.def
> index b4266c54464..62a29b9ce03 100644
> --- a/gcc/config/rs6000/rs6000-overload.def
> +++ b/gcc/config/rs6000/rs6000-overload.def
> @@ -3454,27 +3454,27 @@
>  
>  [VEC_SLL, vec_sll, __builtin_vec_sll]
>    vsc __builtin_vec_sll (vsc, vuc);
> -    VSL  VSL_VSC
> +    VSL_16QI  VSL_VSC
>    vuc __builtin_vec_sll (vuc, vuc);
> -    VSL  VSL_VUC
> +    VSL_16QI  VSL_VUC
>    vss __builtin_vec_sll (vss, vuc);
> -    VSL  VSL_VSS
> +    VSL_8HI  VSL_VSS
>    vus __builtin_vec_sll (vus, vuc);
> -    VSL  VSL_VUS
> +    VSL_8HI  VSL_VUS
>    vp __builtin_vec_sll (vp, vuc);
> -    VSL  VSL_VP
> +    VSL_8HI  VSL_VP
>    vsi __builtin_vec_sll (vsi, vuc);
> -    VSL  VSL_VSI
> +    VSL_4SI  VSL_VSI
>    vui __builtin_vec_sll (vui, vuc);
> -    VSL  VSL_VUI
> +    VSL_4SI  VSL_VUI
>    vsll __builtin_vec_sll (vsll, vuc);
> -    VSL  VSL_VSLL
> +    VSL_2DI VSL_VSLL
>    vull __builtin_vec_sll (vull, vuc);
> -    VSL  VSL_VULL
> +    VSL_2DI  VSL_VULL
>    vsq __builtin_vec_sll (vsq, vuc);
> -    VSL  VSL_VSQ
> +    VSL_1TI  VSL_VSQ
>    vuq __builtin_vec_sll (vuq, vuc);
> -    VSL  VSL_VUQ
> +    VSL_1TI  VSL_VUQ
>  ; The following variants are deprecated.
>    vsc __builtin_vec_sll (vsc, vus);
>      VSL  VSL_VSC_VUS
> @@ -3531,53 +3531,53 @@
>  
>  [VEC_SLO, vec_slo, __builtin_vec_slo]
>    vsc __builtin_vec_slo (vsc, vsc);
> -    VSLO  VSLO_VSCS
> +    VSLO_16QI  VSLO_VSCS
>    vsc __builtin_vec_slo (vsc, vuc);
> -    VSLO  VSLO_VSCU
> +    VSLO_16QI  VSLO_VSCU
>    vuc __builtin_vec_slo (vuc, vsc);
> -    VSLO  VSLO_VUCS
> +    VSLO_16QI  VSLO_VUCS
>    vuc __builtin_vec_slo (vuc, vuc);
> -    VSLO  VSLO_VUCU
> +    VSLO_16QI  VSLO_VUCU
>    vss __builtin_vec_slo (vss, vsc);
> -    VSLO  VSLO_VSSS
> +    VSLO_8HI  VSLO_VSSS
>    vss __builtin_vec_slo (vss, vuc);
> -    VSLO  VSLO_VSSU
> +    VSLO_8HI  VSLO_VSSU
>    vus __builtin_vec_slo (vus, vsc);
> -    VSLO  VSLO_VUSS
> +    VSLO_8HI  VSLO_VUSS
>    vus __builtin_vec_slo (vus, vuc);
> -    VSLO  VSLO_VUSU
> +    VSLO_8HI  VSLO_VUSU
>    vp __builtin_vec_slo (vp, vsc);
> -    VSLO  VSLO_VPS
> +    VSLO_8HI  VSLO_VPS
>    vp __builtin_vec_slo (vp, vuc);
> -    VSLO  VSLO_VPU
> +    VSLO_8HI  VSLO_VPU
>    vsi __builtin_vec_slo (vsi, vsc);
> -    VSLO  VSLO_VSIS
> +    VSLO_4SI  VSLO_VSIS
>    vsi __builtin_vec_slo (vsi, vuc);
> -    VSLO  VSLO_VSIU
> +    VSLO_4SI  VSLO_VSIU
>    vui __builtin_vec_slo (vui, vsc);
> -    VSLO  VSLO_VUIS
> +    VSLO_4SI  VSLO_VUIS
>    vui __builtin_vec_slo (vui, vuc);
> -    VSLO  VSLO_VUIU
> +    VSLO_4SI  VSLO_VUIU
>    vsll __builtin_vec_slo (vsll, vsc);
> -    VSLO  VSLO_VSLLS
> +    VSLO_2DI  VSLO_VSLLS
>    vsll __builtin_vec_slo (vsll, vuc);
> -    VSLO  VSLO_VSLLU
> +    VSLO_2DI  VSLO_VSLLU
>    vull __builtin_vec_slo (vull, vsc);
> -    VSLO  VSLO_VULLS
> +    VSLO_2DI  VSLO_VULLS
>    vull __builtin_vec_slo (vull, vuc);
> -    VSLO  VSLO_VULLU
> +    VSLO_2DI  VSLO_VULLU
>    vf __builtin_vec_slo (vf, vsc);
> -    VSLO  VSLO_VFS
> +    VSLO_4SF  VSLO_VFS
>    vf __builtin_vec_slo (vf, vuc);
> -    VSLO  VSLO_VFU
> +    VSLO_4SF  VSLO_VFU
>    vsq __builtin_vec_slo (vsq, vsc);
> -    VSLO  VSLDO_VSQS
> +    VSLO_1TI  VSLDO_VSQS
>    vsq __builtin_vec_slo (vsq, vuc);
> -    VSLO  VSLDO_VSQU
> +    VSLO_1TI  VSLDO_VSQU
>    vuq __builtin_vec_slo (vuq, vsc);
> -    VSLO  VSLDO_VUQS
> +    VSLO_1TI  VSLDO_VUQS
>    vuq __builtin_vec_slo (vuq, vuc);
> -    VSLO  VSLDO_VUQU
> +    VSLO_1TI  VSLDO_VUQU
>  
>  [VEC_SLV, vec_slv, __builtin_vec_vslv]
>    vuc __builtin_vec_vslv (vuc, vuc);
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr117818-1.c 
> b/gcc/testsuite/gcc.target/powerpc/pr117818-1.c
> new file mode 100644
> index 00000000000..e0e8b6701e4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr117818-1.c
> @@ -0,0 +1,33 @@
> +/* { dg-do compile { target lp64 } } */
> +/* { dg-options "-mdejagnu-cpu=power8 -mvsx -O2" } */
> +
> +#include <altivec.h>
> +
> +typedef vector unsigned char vui8_t;
> +
> +vui8_t
> +test_splat1 (vui8_t vra)
> +{
> +  vui8_t result;
> +  vui8_t tmp = vec_splat_u8(-9);             /*  VSPLTISB  */
> +  tmp = vec_add (tmp, tmp);                  /*  VADDUBM  */
> +  result = vec_slo ((vui8_t) vra, tmp);      /*  VSLO  */
> +  return (vui8_t) vec_sll (result, tmp);     /*  VSL  */
> +}
> +
> +vui8_t
> +test_splat2 (vui8_t vra)
> +{
> +  vui8_t result;
> +  vui8_t tmp = vec_splat_u8(9);              /*  VSPLTISB  */
> +  tmp = vec_add (tmp, tmp);                  /*  VADDUBM  */
> +  result = vec_slo ((vui8_t) vra, tmp);      /*  VSLO  */
> +  return (vui8_t) vec_sll (result, tmp);     /*  VSLO  */
> +}
> +
> +/* { dg-final { scan-assembler-times {\mvspltisb\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mvaddubm\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mvslo\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mvsl\M} 2 } } */
> +/* { dg-final { scan-assembler-not {\mlvx?\M} } } */
> +/* { dg-final { scan-assembler-not {\mvadduwm\M} } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr118480-3.c 
> b/gcc/testsuite/gcc.target/powerpc/pr118480-3.c
> new file mode 100644
> index 00000000000..37388cf944a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr118480-3.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile { target lp64 } } */
> +/* { dg-options "-mdejagnu-cpu=power9 -mvsx -O2" } */
> +
> +#include <altivec.h>
> +
> +typedef vector unsigned char vui8_t;
> +
> +vui8_t
> +test_slqi_char_18_V3 (vui8_t vra)
> +{
> +  vui8_t result;
> +  vui8_t tmp = vec_splats((unsigned char)18);                /*  XXSPLTIB  */
> +  result = vec_vslo ((vui8_t) vra, tmp);             /*  VSLO  */
> +  return vec_vsl (result, tmp);                      /*  VSL  */
> +}
> +
> +vui8_t
> +test_slqi_char_116_V3 (vui8_t vra)
> +{
> +  vui8_t result;
> +  vui8_t tmp = vec_splats((unsigned char)116);        /*  XXSPLTIB  */
> +  result = vec_slo (vra, tmp);                        /*  VSLO  */
> +  return vec_sll (result, tmp);                       /*  VSL  */
> +}
> +
> +vui8_t
> +test_slqi_char_116_V0 (vui8_t vra)
> +{
> +  vui8_t result;
> +  vui8_t tmp = vec_splat_u8(-12);                    /*  XXSPLTIB  */
> +  result = vec_slo (vra, tmp);                               /*  VSLO  */
> +  return vec_sll (result, tmp);                      /*  VSL  */
> +}
> +
> +/* { dg-final { scan-assembler-times {\mxxspltib\M} 3 } } */
> +/* { dg-final { scan-assembler-times {\mvslo\M} 3 } } */
> +/* { dg-final { scan-assembler-times {\mvsl\M} 3 } } */
> +/* { dg-final { scan-assembler-not {\mlxv?\M} } } */
> +/* { dg-final { scan-assembler-not {\mvspltisb\M} } } */

Reply via email to