Ping! please review.
Thanks & Regards Jeevitha On 26/08/25 6:42 pm, jeevitha wrote: > > Hi All, > > The following patch has been bootstrapped and regtested on powerpc64le-linux. > > Previously, vec_slo/vec_sll always default to V4SI, inserting unwanted > VIEW_CONVERT_EXPR int casts. This caused widening of char/short vectors, > constants > exceeding vspltisb/xxspltib range. > > For example: > vui8_t vra, tmp; > _2 = VIEW_CONVERT_EXPR<__vector signed int>(vra); > _3 = VIEW_CONVERT_EXPR<__vector signed int>(tmp); > _4 = __builtin_altivec_vslo_v16qi(_2, _3); > > With this patch, vec_slo/vec_sll now select the correct vector type based on > their arguments. For example: > > vui8_t vra, tmp; > _2 = VIEW_CONVERT_EXPR<__vector signed char>(vra); > _3 = VIEW_CONVERT_EXPR<__vector signed char>(tmp); > _4 = __builtin_altivec_vslo_v16qi(_2, _3); > > This ensures proper handling across all supported modes (V16QI, V8HI, V4SI, > V2DI, V1TI, V4SF). Mode-specific builtins for vsl and vslo were added to avoid > unnecessary casting. > > 2025-08-26 Jeevitha Palanisamy <jeevi...@linux.ibm.com> > > gcc/ > PR target/118480 > PR target/117818 > * config/rs6000/altivec.md (altivec_vslo_<mode>): New define_insn. > (altivec_vsl_<mode>): New define_insn. > * config/rs6000/rs6000-builtins.def: Add builtins for vsl/vslo with > mode-specific support. > * config/rs6000/rs6000-overload.def: Update vec_sll/vec_slo overloads > to use new mode-specific variants. > > gcc/testsuite/ > PR target/118480 > PR target/117818 > * gcc.target/powerpc/pr118480-3.c: New test. > * gcc.target/powerpc/pr117818-1.c: New test. > > > diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md > index 7edc288a656..c11acd30870 100644 > --- a/gcc/config/rs6000/altivec.md > +++ b/gcc/config/rs6000/altivec.md > @@ -54,6 +54,7 @@ > UNSPEC_VPACK_UNS_UNS_MOD > UNSPEC_VPACK_UNS_UNS_MOD_DIRECT > UNSPEC_VREVEV > + UNSPEC_VSL > UNSPEC_VSLV4SI > UNSPEC_VSLO > UNSPEC_VSR > @@ -2071,6 +2072,15 @@ > "vrlqnm %0,%1,%2" > [(set_attr "type" "veclogical")]) > > +(define_insn "altivec_vsl_<mode>" > + [(set (match_operand:VSX_MM 0 "register_operand" "=v") > + (unspec:VSX_MM [(match_operand:VSX_MM 1 "register_operand" "v") > + (match_operand:V16QI 2 "register_operand" "v")] > + UNSPEC_VSL))] > + "TARGET_ALTIVEC" > + "vsl %0,%1,%2" > + [(set_attr "type" "vecperm")]) > + > (define_insn "altivec_vsl" > [(set (match_operand:V4SI 0 "register_operand" "=v") > (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") > @@ -2080,11 +2090,11 @@ > "vsl %0,%1,%2" > [(set_attr "type" "vecperm")]) > > -(define_insn "altivec_vslo" > - [(set (match_operand:V4SI 0 "register_operand" "=v") > - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") > - (match_operand:V4SI 2 "register_operand" "v")] > - UNSPEC_VSLO))] > +(define_insn "altivec_vslo_<mode>" > + [(set (match_operand:VM 0 "register_operand" "=v") > + (unspec:VM [(match_operand:VM 1 "register_operand" "v") > + (match_operand:V16QI 2 "register_operand" "v")] > + UNSPEC_VSLO))] > "TARGET_ALTIVEC" > "vslo %0,%1,%2" > [(set_attr "type" "vecperm")]) > diff --git a/gcc/config/rs6000/rs6000-builtins.def > b/gcc/config/rs6000/rs6000-builtins.def > index 555d7d58950..c7622f11816 100644 > --- a/gcc/config/rs6000/rs6000-builtins.def > +++ b/gcc/config/rs6000/rs6000-builtins.def > @@ -948,6 +948,21 @@ > const vsi __builtin_altivec_vsl (vsi, vsi); > VSL altivec_vsl {} > > + const vsc __builtin_altivec_vsl_v16qi (vsc, vsc); > + VSL_16QI altivec_vsl_v16qi {} > + > + const vss __builtin_altivec_vsl_v8hi (vss, vsc); > + VSL_8HI altivec_vsl_v8hi {} > + > + const vsi __builtin_altivec_vsl_v4si (vsi, vsc); > + VSL_4SI altivec_vsl_v4si {} > + > + const vsll __builtin_altivec_vsl_v2di (vsll, vsc); > + VSL_2DI altivec_vsl_v2di {} > + > + const vsq __builtin_altivec_vsl_v1ti (vsq, vsc); > + VSL_1TI altivec_vsl_v1ti {} > + > const vsc __builtin_altivec_vslb (vsc, vuc); > VSLB vashlv16qi3 {} > > @@ -969,8 +984,23 @@ > const vss __builtin_altivec_vslh (vss, vus); > VSLH vashlv8hi3 {} > > - const vsi __builtin_altivec_vslo (vsi, vsi); > - VSLO altivec_vslo {} > + const vsc __builtin_altivec_vslo_v16qi (vsc, vsc); > + VSLO_16QI altivec_vslo_v16qi {} > + > + const vss __builtin_altivec_vslo_v8hi (vss, vsc); > + VSLO_8HI altivec_vslo_v8hi {} > + > + const vf __builtin_altivec_vslo_v4sf (vf, vsc); > + VSLO_4SF altivec_vslo_v4sf {} > + > + const vsi __builtin_altivec_vslo_v4si (vsi, vsc); > + VSLO_4SI altivec_vslo_v4si {} > + > + const vsll __builtin_altivec_vslo_v2di (vsll, vsc); > + VSLO_2DI altivec_vslo_v2di {} > + > + const vsq __builtin_altivec_vslo_v1ti (vsq, vsc); > + VSLO_1TI altivec_vslo_v1ti {} > > const vsi __builtin_altivec_vslw (vsi, vui); > VSLW vashlv4si3 {} > diff --git a/gcc/config/rs6000/rs6000-overload.def > b/gcc/config/rs6000/rs6000-overload.def > index b4266c54464..62a29b9ce03 100644 > --- a/gcc/config/rs6000/rs6000-overload.def > +++ b/gcc/config/rs6000/rs6000-overload.def > @@ -3454,27 +3454,27 @@ > > [VEC_SLL, vec_sll, __builtin_vec_sll] > vsc __builtin_vec_sll (vsc, vuc); > - VSL VSL_VSC > + VSL_16QI VSL_VSC > vuc __builtin_vec_sll (vuc, vuc); > - VSL VSL_VUC > + VSL_16QI VSL_VUC > vss __builtin_vec_sll (vss, vuc); > - VSL VSL_VSS > + VSL_8HI VSL_VSS > vus __builtin_vec_sll (vus, vuc); > - VSL VSL_VUS > + VSL_8HI VSL_VUS > vp __builtin_vec_sll (vp, vuc); > - VSL VSL_VP > + VSL_8HI VSL_VP > vsi __builtin_vec_sll (vsi, vuc); > - VSL VSL_VSI > + VSL_4SI VSL_VSI > vui __builtin_vec_sll (vui, vuc); > - VSL VSL_VUI > + VSL_4SI VSL_VUI > vsll __builtin_vec_sll (vsll, vuc); > - VSL VSL_VSLL > + VSL_2DI VSL_VSLL > vull __builtin_vec_sll (vull, vuc); > - VSL VSL_VULL > + VSL_2DI VSL_VULL > vsq __builtin_vec_sll (vsq, vuc); > - VSL VSL_VSQ > + VSL_1TI VSL_VSQ > vuq __builtin_vec_sll (vuq, vuc); > - VSL VSL_VUQ > + VSL_1TI VSL_VUQ > ; The following variants are deprecated. > vsc __builtin_vec_sll (vsc, vus); > VSL VSL_VSC_VUS > @@ -3531,53 +3531,53 @@ > > [VEC_SLO, vec_slo, __builtin_vec_slo] > vsc __builtin_vec_slo (vsc, vsc); > - VSLO VSLO_VSCS > + VSLO_16QI VSLO_VSCS > vsc __builtin_vec_slo (vsc, vuc); > - VSLO VSLO_VSCU > + VSLO_16QI VSLO_VSCU > vuc __builtin_vec_slo (vuc, vsc); > - VSLO VSLO_VUCS > + VSLO_16QI VSLO_VUCS > vuc __builtin_vec_slo (vuc, vuc); > - VSLO VSLO_VUCU > + VSLO_16QI VSLO_VUCU > vss __builtin_vec_slo (vss, vsc); > - VSLO VSLO_VSSS > + VSLO_8HI VSLO_VSSS > vss __builtin_vec_slo (vss, vuc); > - VSLO VSLO_VSSU > + VSLO_8HI VSLO_VSSU > vus __builtin_vec_slo (vus, vsc); > - VSLO VSLO_VUSS > + VSLO_8HI VSLO_VUSS > vus __builtin_vec_slo (vus, vuc); > - VSLO VSLO_VUSU > + VSLO_8HI VSLO_VUSU > vp __builtin_vec_slo (vp, vsc); > - VSLO VSLO_VPS > + VSLO_8HI VSLO_VPS > vp __builtin_vec_slo (vp, vuc); > - VSLO VSLO_VPU > + VSLO_8HI VSLO_VPU > vsi __builtin_vec_slo (vsi, vsc); > - VSLO VSLO_VSIS > + VSLO_4SI VSLO_VSIS > vsi __builtin_vec_slo (vsi, vuc); > - VSLO VSLO_VSIU > + VSLO_4SI VSLO_VSIU > vui __builtin_vec_slo (vui, vsc); > - VSLO VSLO_VUIS > + VSLO_4SI VSLO_VUIS > vui __builtin_vec_slo (vui, vuc); > - VSLO VSLO_VUIU > + VSLO_4SI VSLO_VUIU > vsll __builtin_vec_slo (vsll, vsc); > - VSLO VSLO_VSLLS > + VSLO_2DI VSLO_VSLLS > vsll __builtin_vec_slo (vsll, vuc); > - VSLO VSLO_VSLLU > + VSLO_2DI VSLO_VSLLU > vull __builtin_vec_slo (vull, vsc); > - VSLO VSLO_VULLS > + VSLO_2DI VSLO_VULLS > vull __builtin_vec_slo (vull, vuc); > - VSLO VSLO_VULLU > + VSLO_2DI VSLO_VULLU > vf __builtin_vec_slo (vf, vsc); > - VSLO VSLO_VFS > + VSLO_4SF VSLO_VFS > vf __builtin_vec_slo (vf, vuc); > - VSLO VSLO_VFU > + VSLO_4SF VSLO_VFU > vsq __builtin_vec_slo (vsq, vsc); > - VSLO VSLDO_VSQS > + VSLO_1TI VSLDO_VSQS > vsq __builtin_vec_slo (vsq, vuc); > - VSLO VSLDO_VSQU > + VSLO_1TI VSLDO_VSQU > vuq __builtin_vec_slo (vuq, vsc); > - VSLO VSLDO_VUQS > + VSLO_1TI VSLDO_VUQS > vuq __builtin_vec_slo (vuq, vuc); > - VSLO VSLDO_VUQU > + VSLO_1TI VSLDO_VUQU > > [VEC_SLV, vec_slv, __builtin_vec_vslv] > vuc __builtin_vec_vslv (vuc, vuc); > diff --git a/gcc/testsuite/gcc.target/powerpc/pr117818-1.c > b/gcc/testsuite/gcc.target/powerpc/pr117818-1.c > new file mode 100644 > index 00000000000..e0e8b6701e4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr117818-1.c > @@ -0,0 +1,33 @@ > +/* { dg-do compile { target lp64 } } */ > +/* { dg-options "-mdejagnu-cpu=power8 -mvsx -O2" } */ > + > +#include <altivec.h> > + > +typedef vector unsigned char vui8_t; > + > +vui8_t > +test_splat1 (vui8_t vra) > +{ > + vui8_t result; > + vui8_t tmp = vec_splat_u8(-9); /* VSPLTISB */ > + tmp = vec_add (tmp, tmp); /* VADDUBM */ > + result = vec_slo ((vui8_t) vra, tmp); /* VSLO */ > + return (vui8_t) vec_sll (result, tmp); /* VSL */ > +} > + > +vui8_t > +test_splat2 (vui8_t vra) > +{ > + vui8_t result; > + vui8_t tmp = vec_splat_u8(9); /* VSPLTISB */ > + tmp = vec_add (tmp, tmp); /* VADDUBM */ > + result = vec_slo ((vui8_t) vra, tmp); /* VSLO */ > + return (vui8_t) vec_sll (result, tmp); /* VSLO */ > +} > + > +/* { dg-final { scan-assembler-times {\mvspltisb\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mvaddubm\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mvslo\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mvsl\M} 2 } } */ > +/* { dg-final { scan-assembler-not {\mlvx?\M} } } */ > +/* { dg-final { scan-assembler-not {\mvadduwm\M} } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pr118480-3.c > b/gcc/testsuite/gcc.target/powerpc/pr118480-3.c > new file mode 100644 > index 00000000000..37388cf944a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr118480-3.c > @@ -0,0 +1,39 @@ > +/* { dg-do compile { target lp64 } } */ > +/* { dg-options "-mdejagnu-cpu=power9 -mvsx -O2" } */ > + > +#include <altivec.h> > + > +typedef vector unsigned char vui8_t; > + > +vui8_t > +test_slqi_char_18_V3 (vui8_t vra) > +{ > + vui8_t result; > + vui8_t tmp = vec_splats((unsigned char)18); /* XXSPLTIB */ > + result = vec_vslo ((vui8_t) vra, tmp); /* VSLO */ > + return vec_vsl (result, tmp); /* VSL */ > +} > + > +vui8_t > +test_slqi_char_116_V3 (vui8_t vra) > +{ > + vui8_t result; > + vui8_t tmp = vec_splats((unsigned char)116); /* XXSPLTIB */ > + result = vec_slo (vra, tmp); /* VSLO */ > + return vec_sll (result, tmp); /* VSL */ > +} > + > +vui8_t > +test_slqi_char_116_V0 (vui8_t vra) > +{ > + vui8_t result; > + vui8_t tmp = vec_splat_u8(-12); /* XXSPLTIB */ > + result = vec_slo (vra, tmp); /* VSLO */ > + return vec_sll (result, tmp); /* VSL */ > +} > + > +/* { dg-final { scan-assembler-times {\mxxspltib\M} 3 } } */ > +/* { dg-final { scan-assembler-times {\mvslo\M} 3 } } */ > +/* { dg-final { scan-assembler-times {\mvsl\M} 3 } } */ > +/* { dg-final { scan-assembler-not {\mlxv?\M} } } */ > +/* { dg-final { scan-assembler-not {\mvspltisb\M} } } */