Hi All,
The following patch has been bootstrapped and regtested on powerpc64le-linux. Previously, vec_slo/vec_sll always default to V4SI, inserting unwanted VIEW_CONVERT_EXPR int casts. This caused widening of char/short vectors, constants exceeding vspltisb/xxspltib range. For example: vui8_t vra, tmp; _2 = VIEW_CONVERT_EXPR<__vector signed int>(vra); _3 = VIEW_CONVERT_EXPR<__vector signed int>(tmp); _4 = __builtin_altivec_vslo_v16qi(_2, _3); With this patch, vec_slo/vec_sll now select the correct vector type based on their arguments. For example: vui8_t vra, tmp; _2 = VIEW_CONVERT_EXPR<__vector signed char>(vra); _3 = VIEW_CONVERT_EXPR<__vector signed char>(tmp); _4 = __builtin_altivec_vslo_v16qi(_2, _3); This ensures proper handling across all supported modes (V16QI, V8HI, V4SI, V2DI, V1TI, V4SF). Mode-specific builtins for vsl and vslo were added to avoid unnecessary casting. 2025-08-26 Jeevitha Palanisamy <jeevi...@linux.ibm.com> gcc/ PR target/118480 PR target/117818 * config/rs6000/altivec.md (altivec_vslo_<mode>): New define_insn. (altivec_vsl_<mode>): New define_insn. * config/rs6000/rs6000-builtins.def: Add builtins for vsl/vslo with mode-specific support. * config/rs6000/rs6000-overload.def: Update vec_sll/vec_slo overloads to use new mode-specific variants. gcc/testsuite/ PR target/118480 PR target/117818 * gcc.target/powerpc/pr118480-3.c: New test. * gcc.target/powerpc/pr117818-1.c: New test. diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 7edc288a656..c11acd30870 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -54,6 +54,7 @@ UNSPEC_VPACK_UNS_UNS_MOD UNSPEC_VPACK_UNS_UNS_MOD_DIRECT UNSPEC_VREVEV + UNSPEC_VSL UNSPEC_VSLV4SI UNSPEC_VSLO UNSPEC_VSR @@ -2071,6 +2072,15 @@ "vrlqnm %0,%1,%2" [(set_attr "type" "veclogical")]) +(define_insn "altivec_vsl_<mode>" + [(set (match_operand:VSX_MM 0 "register_operand" "=v") + (unspec:VSX_MM [(match_operand:VSX_MM 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VSL))] + "TARGET_ALTIVEC" + "vsl %0,%1,%2" + [(set_attr "type" "vecperm")]) + (define_insn "altivec_vsl" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") @@ -2080,11 +2090,11 @@ "vsl %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vslo" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VSLO))] +(define_insn "altivec_vslo_<mode>" + [(set (match_operand:VM 0 "register_operand" "=v") + (unspec:VM [(match_operand:VM 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VSLO))] "TARGET_ALTIVEC" "vslo %0,%1,%2" [(set_attr "type" "vecperm")]) diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 555d7d58950..c7622f11816 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -948,6 +948,21 @@ const vsi __builtin_altivec_vsl (vsi, vsi); VSL altivec_vsl {} + const vsc __builtin_altivec_vsl_v16qi (vsc, vsc); + VSL_16QI altivec_vsl_v16qi {} + + const vss __builtin_altivec_vsl_v8hi (vss, vsc); + VSL_8HI altivec_vsl_v8hi {} + + const vsi __builtin_altivec_vsl_v4si (vsi, vsc); + VSL_4SI altivec_vsl_v4si {} + + const vsll __builtin_altivec_vsl_v2di (vsll, vsc); + VSL_2DI altivec_vsl_v2di {} + + const vsq __builtin_altivec_vsl_v1ti (vsq, vsc); + VSL_1TI altivec_vsl_v1ti {} + const vsc __builtin_altivec_vslb (vsc, vuc); VSLB vashlv16qi3 {} @@ -969,8 +984,23 @@ const vss __builtin_altivec_vslh (vss, vus); VSLH vashlv8hi3 {} - const vsi __builtin_altivec_vslo (vsi, vsi); - VSLO altivec_vslo {} + const vsc __builtin_altivec_vslo_v16qi (vsc, vsc); + VSLO_16QI altivec_vslo_v16qi {} + + const vss __builtin_altivec_vslo_v8hi (vss, vsc); + VSLO_8HI altivec_vslo_v8hi {} + + const vf __builtin_altivec_vslo_v4sf (vf, vsc); + VSLO_4SF altivec_vslo_v4sf {} + + const vsi __builtin_altivec_vslo_v4si (vsi, vsc); + VSLO_4SI altivec_vslo_v4si {} + + const vsll __builtin_altivec_vslo_v2di (vsll, vsc); + VSLO_2DI altivec_vslo_v2di {} + + const vsq __builtin_altivec_vslo_v1ti (vsq, vsc); + VSLO_1TI altivec_vslo_v1ti {} const vsi __builtin_altivec_vslw (vsi, vui); VSLW vashlv4si3 {} diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def index b4266c54464..62a29b9ce03 100644 --- a/gcc/config/rs6000/rs6000-overload.def +++ b/gcc/config/rs6000/rs6000-overload.def @@ -3454,27 +3454,27 @@ [VEC_SLL, vec_sll, __builtin_vec_sll] vsc __builtin_vec_sll (vsc, vuc); - VSL VSL_VSC + VSL_16QI VSL_VSC vuc __builtin_vec_sll (vuc, vuc); - VSL VSL_VUC + VSL_16QI VSL_VUC vss __builtin_vec_sll (vss, vuc); - VSL VSL_VSS + VSL_8HI VSL_VSS vus __builtin_vec_sll (vus, vuc); - VSL VSL_VUS + VSL_8HI VSL_VUS vp __builtin_vec_sll (vp, vuc); - VSL VSL_VP + VSL_8HI VSL_VP vsi __builtin_vec_sll (vsi, vuc); - VSL VSL_VSI + VSL_4SI VSL_VSI vui __builtin_vec_sll (vui, vuc); - VSL VSL_VUI + VSL_4SI VSL_VUI vsll __builtin_vec_sll (vsll, vuc); - VSL VSL_VSLL + VSL_2DI VSL_VSLL vull __builtin_vec_sll (vull, vuc); - VSL VSL_VULL + VSL_2DI VSL_VULL vsq __builtin_vec_sll (vsq, vuc); - VSL VSL_VSQ + VSL_1TI VSL_VSQ vuq __builtin_vec_sll (vuq, vuc); - VSL VSL_VUQ + VSL_1TI VSL_VUQ ; The following variants are deprecated. vsc __builtin_vec_sll (vsc, vus); VSL VSL_VSC_VUS @@ -3531,53 +3531,53 @@ [VEC_SLO, vec_slo, __builtin_vec_slo] vsc __builtin_vec_slo (vsc, vsc); - VSLO VSLO_VSCS + VSLO_16QI VSLO_VSCS vsc __builtin_vec_slo (vsc, vuc); - VSLO VSLO_VSCU + VSLO_16QI VSLO_VSCU vuc __builtin_vec_slo (vuc, vsc); - VSLO VSLO_VUCS + VSLO_16QI VSLO_VUCS vuc __builtin_vec_slo (vuc, vuc); - VSLO VSLO_VUCU + VSLO_16QI VSLO_VUCU vss __builtin_vec_slo (vss, vsc); - VSLO VSLO_VSSS + VSLO_8HI VSLO_VSSS vss __builtin_vec_slo (vss, vuc); - VSLO VSLO_VSSU + VSLO_8HI VSLO_VSSU vus __builtin_vec_slo (vus, vsc); - VSLO VSLO_VUSS + VSLO_8HI VSLO_VUSS vus __builtin_vec_slo (vus, vuc); - VSLO VSLO_VUSU + VSLO_8HI VSLO_VUSU vp __builtin_vec_slo (vp, vsc); - VSLO VSLO_VPS + VSLO_8HI VSLO_VPS vp __builtin_vec_slo (vp, vuc); - VSLO VSLO_VPU + VSLO_8HI VSLO_VPU vsi __builtin_vec_slo (vsi, vsc); - VSLO VSLO_VSIS + VSLO_4SI VSLO_VSIS vsi __builtin_vec_slo (vsi, vuc); - VSLO VSLO_VSIU + VSLO_4SI VSLO_VSIU vui __builtin_vec_slo (vui, vsc); - VSLO VSLO_VUIS + VSLO_4SI VSLO_VUIS vui __builtin_vec_slo (vui, vuc); - VSLO VSLO_VUIU + VSLO_4SI VSLO_VUIU vsll __builtin_vec_slo (vsll, vsc); - VSLO VSLO_VSLLS + VSLO_2DI VSLO_VSLLS vsll __builtin_vec_slo (vsll, vuc); - VSLO VSLO_VSLLU + VSLO_2DI VSLO_VSLLU vull __builtin_vec_slo (vull, vsc); - VSLO VSLO_VULLS + VSLO_2DI VSLO_VULLS vull __builtin_vec_slo (vull, vuc); - VSLO VSLO_VULLU + VSLO_2DI VSLO_VULLU vf __builtin_vec_slo (vf, vsc); - VSLO VSLO_VFS + VSLO_4SF VSLO_VFS vf __builtin_vec_slo (vf, vuc); - VSLO VSLO_VFU + VSLO_4SF VSLO_VFU vsq __builtin_vec_slo (vsq, vsc); - VSLO VSLDO_VSQS + VSLO_1TI VSLDO_VSQS vsq __builtin_vec_slo (vsq, vuc); - VSLO VSLDO_VSQU + VSLO_1TI VSLDO_VSQU vuq __builtin_vec_slo (vuq, vsc); - VSLO VSLDO_VUQS + VSLO_1TI VSLDO_VUQS vuq __builtin_vec_slo (vuq, vuc); - VSLO VSLDO_VUQU + VSLO_1TI VSLDO_VUQU [VEC_SLV, vec_slv, __builtin_vec_vslv] vuc __builtin_vec_vslv (vuc, vuc); diff --git a/gcc/testsuite/gcc.target/powerpc/pr117818-1.c b/gcc/testsuite/gcc.target/powerpc/pr117818-1.c new file mode 100644 index 00000000000..e0e8b6701e4 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr117818-1.c @@ -0,0 +1,33 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-mdejagnu-cpu=power8 -mvsx -O2" } */ + +#include <altivec.h> + +typedef vector unsigned char vui8_t; + +vui8_t +test_splat1 (vui8_t vra) +{ + vui8_t result; + vui8_t tmp = vec_splat_u8(-9); /* VSPLTISB */ + tmp = vec_add (tmp, tmp); /* VADDUBM */ + result = vec_slo ((vui8_t) vra, tmp); /* VSLO */ + return (vui8_t) vec_sll (result, tmp); /* VSL */ +} + +vui8_t +test_splat2 (vui8_t vra) +{ + vui8_t result; + vui8_t tmp = vec_splat_u8(9); /* VSPLTISB */ + tmp = vec_add (tmp, tmp); /* VADDUBM */ + result = vec_slo ((vui8_t) vra, tmp); /* VSLO */ + return (vui8_t) vec_sll (result, tmp); /* VSLO */ +} + +/* { dg-final { scan-assembler-times {\mvspltisb\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvaddubm\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvslo\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvsl\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mlvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mvadduwm\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr118480-3.c b/gcc/testsuite/gcc.target/powerpc/pr118480-3.c new file mode 100644 index 00000000000..37388cf944a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr118480-3.c @@ -0,0 +1,39 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-mdejagnu-cpu=power9 -mvsx -O2" } */ + +#include <altivec.h> + +typedef vector unsigned char vui8_t; + +vui8_t +test_slqi_char_18_V3 (vui8_t vra) +{ + vui8_t result; + vui8_t tmp = vec_splats((unsigned char)18); /* XXSPLTIB */ + result = vec_vslo ((vui8_t) vra, tmp); /* VSLO */ + return vec_vsl (result, tmp); /* VSL */ +} + +vui8_t +test_slqi_char_116_V3 (vui8_t vra) +{ + vui8_t result; + vui8_t tmp = vec_splats((unsigned char)116); /* XXSPLTIB */ + result = vec_slo (vra, tmp); /* VSLO */ + return vec_sll (result, tmp); /* VSL */ +} + +vui8_t +test_slqi_char_116_V0 (vui8_t vra) +{ + vui8_t result; + vui8_t tmp = vec_splat_u8(-12); /* XXSPLTIB */ + result = vec_slo (vra, tmp); /* VSLO */ + return vec_sll (result, tmp); /* VSL */ +} + +/* { dg-final { scan-assembler-times {\mxxspltib\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mvslo\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mvsl\M} 3 } } */ +/* { dg-final { scan-assembler-not {\mlxv?\M} } } */ +/* { dg-final { scan-assembler-not {\mvspltisb\M} } } */