Hi Carl, on 2024/8/2 03:35, Carl Love wrote: > GCC developers: > > Version 3, updated the testcase dg-do link to dg-do compile. Moved the new > documentation again. Retested on Power 10 LE and BE to verify the dg > arguments disable the test on Power10BE but enable the test for Power10LE. > Reran the full regression testsuite. There were no new regressions for the > testsuite. > > Version 2, updated rs6000-overload.def to remove adding additonal internal > names and to change XXSLDWI_Q to XXSLDWI_1TI per comments from Kewen. Move > new documentation statement for the PIVPR built-ins per comments from Kewen. > Updated dg-do-run directive and added comment about the save-temps in > testcase per feedback from Segher. Retested the patch on Power 10 with no > regressions. > > The following patch adds the int128 varients to the existing overloaded > built-ins vec_sld, vec_sldb, vec_sldw, vec_sll, vec_slo, vec_srdb, vec_srl, > vec_sro. These varients were requested by Steve Munroe. > > The patch has been tested on a Power 10 system with no regressions.
OK with the below nits tweaked and tested well on both BE and LE, thanks! > > Please let me know if the patch is acceptable for mainline. > > Carl > > ------------------------------------------------------------------------------ > rs6000, Add new overloaded vector shift builtin int128 variants > > Add the signed __int128 and unsigned __int128 argument types for the > overloaded built-ins vec_sld, vec_sldb, vec_sldw, vec_sll, vec_slo, > vec_srdb, vec_srl, vec_sro. For each of the new argument types add a > testcase and update the documentation for the built-in. > > gcc/ChangeLog: > * config/rs6000/altivec.md (vs<SLDB_lr>db_<mode>): Change > define_insn iterator to VEC_IC. > * config/rs6000/rs6000-builtins.def (__builtin_altivec_vsldoi_v1ti, > __builtin_vsx_xxsldwi_v1ti, __builtin_altivec_vsldb_v1ti, > __builtin_altivec_vsrdb_v1ti): New builtin definitions. > * config/rs6000/rs6000-overload.def (vec_sld, vec_sldb, vec_sldw, > vec_sll, vec_slo, vec_srdb, vec_srl, vec_sro): New overloaded > definitions. > * doc/extend.texi (vec_sld, vec_sldb, vec_sldw, vec_sll, vec_slo, > vec_srdb, vec_srl, vec_sro): Add documentation for new overloaded > built-ins. > > gcc/testsuite/ChangeLog: > * gcc.target/powerpc/vec-shift-double-runnable-int128.c: New test file. > --- > gcc/config/rs6000/altivec.md | 6 +- > gcc/config/rs6000/rs6000-builtins.def | 12 + > gcc/config/rs6000/rs6000-overload.def | 40 ++ > gcc/doc/extend.texi | 43 ++ > .../vec-shift-double-runnable-int128.c | 419 ++++++++++++++++++ > 5 files changed, 517 insertions(+), 3 deletions(-) > create mode 100644 > gcc/testsuite/gcc.target/powerpc/vec-shift-double-runnable-int128.c > > diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md > index 5af9bf920a2..2a18ee44526 100644 > --- a/gcc/config/rs6000/altivec.md > +++ b/gcc/config/rs6000/altivec.md > @@ -878,9 +878,9 @@ (define_int_attr SLDB_lr [(UNSPEC_SLDB "l") > (define_int_iterator VSHIFT_DBL_LR [UNSPEC_SLDB UNSPEC_SRDB]) > > (define_insn "vs<SLDB_lr>db_<mode>" > - [(set (match_operand:VI2 0 "register_operand" "=v") > - (unspec:VI2 [(match_operand:VI2 1 "register_operand" "v") > - (match_operand:VI2 2 "register_operand" "v") > + [(set (match_operand:VEC_IC 0 "register_operand" "=v") > + (unspec:VEC_IC [(match_operand:VEC_IC 1 "register_operand" "v") > + (match_operand:VEC_IC 2 "register_operand" "v") > (match_operand:QI 3 "const_0_to_12_operand" "n")] > VSHIFT_DBL_LR))] > "TARGET_POWER10" > diff --git a/gcc/config/rs6000/rs6000-builtins.def > b/gcc/config/rs6000/rs6000-builtins.def > index 77eb0f7e406..a2b2b729270 100644 > --- a/gcc/config/rs6000/rs6000-builtins.def > +++ b/gcc/config/rs6000/rs6000-builtins.def > @@ -964,6 +964,9 @@ > const vss __builtin_altivec_vsldoi_8hi (vss, vss, const int<4>); > VSLDOI_8HI altivec_vsldoi_v8hi {} > > + const vsq __builtin_altivec_vsldoi_v1ti (vsq, vsq, const int<4>); > + VSLDOI_V1TI altivec_vsldoi_v1ti {} Nit: s/VSLDOI_V1TI/VSLDOI_1TI/ to align with the other VSLDOI_* (no 'V' in *). > + > const vss __builtin_altivec_vslh (vss, vus); > VSLH vashlv8hi3 {} > > @@ -1831,6 +1834,9 @@ > const vsll __builtin_vsx_xxsldwi_2di (vsll, vsll, const int<2>); > XXSLDWI_2DI vsx_xxsldwi_v2di {} > > + const vsq __builtin_vsx_xxsldwi_v1ti (vsq, vsq, const int<2>); > + XXSLDWI_1TI vsx_xxsldwi_v1ti {} > + > const vf __builtin_vsx_xxsldwi_4sf (vf, vf, const int<2>); > XXSLDWI_4SF vsx_xxsldwi_v4sf {} > > @@ -3299,6 +3305,9 @@ > const vss __builtin_altivec_vsldb_v8hi (vss, vss, const int<3>); > VSLDB_V8HI vsldb_v8hi {} > > + const vsq __builtin_altivec_vsldb_v1ti (vsq, vsq, const int<3>); > + VSLDB_V1TI vsldb_v1ti {} > + > const vsq __builtin_altivec_vslq (vsq, vuq); > VSLQ vashlv1ti3 {} > > @@ -3317,6 +3326,9 @@ > const vss __builtin_altivec_vsrdb_v8hi (vss, vss, const int<3>); > VSRDB_V8HI vsrdb_v8hi {} > > + const vsq __builtin_altivec_vsrdb_v1ti (vsq, vsq, const int<3>); > + VSRDB_V1TI vsrdb_v1ti {} > + > const vsq __builtin_altivec_vsrq (vsq, vuq); > VSRQ vlshrv1ti3 {} > > diff --git a/gcc/config/rs6000/rs6000-overload.def > b/gcc/config/rs6000/rs6000-overload.def > index b5fd9d0e38f..b1e038488e2 100644 > --- a/gcc/config/rs6000/rs6000-overload.def > +++ b/gcc/config/rs6000/rs6000-overload.def > @@ -3399,6 +3399,10 @@ > VSLDOI_4SF > vd __builtin_vec_sld (vd, vd, const int); > VSLDOI_2DF > + vsq __builtin_vec_sld (vsq, vsq, const int); > + VSLDOI_V1TI VSLDOI_VSQ > + vuq __builtin_vec_sld (vuq, vuq, const int); > + VSLDOI_V1TI VSLDOI_VUQ Nit: s/VSLDOI_V1TI/VSLDOI_1TI/ as the above change. > > [VEC_SLDB, vec_sldb, __builtin_vec_sldb] > vsc __builtin_vec_sldb (vsc, vsc, const int); > @@ -3417,6 +3421,10 @@ > VSLDB_V2DI VSLDB_VSLL > vull __builtin_vec_sldb (vull, vull, const int); > VSLDB_V2DI VSLDB_VULL > + vsq __builtin_vec_sldb (vsq, vsq, const int); > + VSLDB_V1TI VSLDB_VSQ > + vuq __builtin_vec_sldb (vuq, vuq, const int); > + VSLDB_V1TI VSLDB_VUQ > > [VEC_SLDW, vec_sldw, __builtin_vec_sldw] > vsc __builtin_vec_sldw (vsc, vsc, const int); > @@ -3439,6 +3447,10 @@ > XXSLDWI_4SF XXSLDWI_VF > vd __builtin_vec_sldw (vd, vd, const int); > XXSLDWI_2DF XXSLDWI_VD > + vsq __builtin_vec_sldw (vsq, vsq, const int); > + XXSLDWI_1TI XXSLDWI_VSQ > + vuq __builtin_vec_sldw (vuq, vuq, const int); > + XXSLDWI_1TI XXSLDWI_VUQ > > [VEC_SLL, vec_sll, __builtin_vec_sll] > vsc __builtin_vec_sll (vsc, vuc); > @@ -3459,6 +3471,10 @@ > VSL VSL_VSLL > vull __builtin_vec_sll (vull, vuc); > VSL VSL_VULL > + vsq __builtin_vec_sll (vsq, vuc); > + VSL VSL_VSQ > + vuq __builtin_vec_sll (vuq, vuc); > + VSL VSL_VUQ > ; The following variants are deprecated. > vsc __builtin_vec_sll (vsc, vus); > VSL VSL_VSC_VUS > @@ -3554,6 +3570,14 @@ > VSLO VSLO_VFS > vf __builtin_vec_slo (vf, vuc); > VSLO VSLO_VFU > + vsq __builtin_vec_slo (vsq, vsc); > + VSLO VSLDO_VSQS > + vsq __builtin_vec_slo (vsq, vuc); > + VSLO VSLDO_VSQU > + vuq __builtin_vec_slo (vuq, vsc); > + VSLO VSLDO_VUQS > + vuq __builtin_vec_slo (vuq, vuc); > + VSLO VSLDO_VUQU > > [VEC_SLV, vec_slv, __builtin_vec_vslv] > vuc __builtin_vec_vslv (vuc, vuc); > @@ -3699,6 +3723,10 @@ > VSRDB_V2DI VSRDB_VSLL > vull __builtin_vec_srdb (vull, vull, const int); > VSRDB_V2DI VSRDB_VULL > + vsq __builtin_vec_srdb (vsq, vsq, const int); > + VSRDB_V1TI VSRDB_VSQ > + vuq __builtin_vec_srdb (vuq, vuq, const int); > + VSRDB_V1TI VSRDB_VUQ > > [VEC_SRL, vec_srl, __builtin_vec_srl] > vsc __builtin_vec_srl (vsc, vuc); > @@ -3719,6 +3747,10 @@ > VSR VSR_VSLL > vull __builtin_vec_srl (vull, vuc); > VSR VSR_VULL > + vsq __builtin_vec_srl (vsq, vuc); > + VSR VSR_VSQ > + vuq __builtin_vec_srl (vuq, vuc); > + VSR VSR_VUQ > ; The following variants are deprecated. > vsc __builtin_vec_srl (vsc, vus); > VSR VSR_VSC_VUS > @@ -3808,6 +3840,14 @@ > VSRO VSRO_VFS > vf __builtin_vec_sro (vf, vuc); > VSRO VSRO_VFU > + vsq __builtin_vec_sro (vsq, vsc); > + VSRO VSRDO_VSQS > + vsq __builtin_vec_sro (vsq, vuc); > + VSRO VSRDO_VSQU > + vuq __builtin_vec_sro (vuq, vsc); > + VSRO VSRDO_VUQS > + vuq __builtin_vec_sro (vuq, vuc); > + VSRO VSRDO_VUQU > > [VEC_SRV, vec_srv, __builtin_vec_vsrv] > vuc __builtin_vec_vsrv (vuc, vuc); > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > index 00b93f954e3..4a9cf366f56 100644 > --- a/gcc/doc/extend.texi > +++ b/gcc/doc/extend.texi > @@ -23519,6 +23519,10 @@ const unsigned int); > vector signed long long, const unsigned int); > @exdent vector unsigned long long vec_sldb (vector unsigned long long, > vector unsigned long long, const unsigned int); > +@exdent vector signed __int128 vec_sldb (vector signed __int128, > +vector signed __int128, const unsigned int); > +@exdent vector unsigned __int128 vec_sldb (vector unsigned __int128, > +vector unsigned __int128, const unsigned int); > @end smallexample > > Shift the combined input vectors left by the amount specified by the > low-order > @@ -23546,6 +23550,10 @@ const unsigned int); > vector signed long long, const unsigned int); > @exdent vector unsigned long long vec_srdb (vector unsigned long long, > vector unsigned long long, const unsigned int); > +@exdent vector signed __int128 vec_srdb (vector signed __int128, > +vector signed __int128, const unsigned int); > +@exdent vector unsigned __int128 vec_srdb (vector unsigned __int128, > +vector unsigned __int128, const unsigned int); > @end smallexample > > Shift the combined input vectors right by the amount specified by the > low-order > @@ -24041,6 +24049,41 @@ int vec_any_le (vector unsigned __int128, vector > unsigned __int128); > @end smallexample > Nit: The following instances are extension of the existing overloaded built-ins @code{vec_sld}, @code{vec_sldw}, @code{vec_slo}, @code{vec_sro}, @code{vec_srl} that are documented in the PVIPR. > > +@smallexample > +@exdent vector signed __int128 vec_sld (vector signed __int128, > +vector signed __int128, const unsigned int); > +@exdent vector unsigned __int128 vec_sld (vector unsigned __int128, > +vector unsigned __int128, const unsigned int); > +@exdent vector signed __int128 vec_sldw (vector signed __int128, > +vector signed __int128, const unsigned int); > +@exdent vector unsigned __int128 vec_sldw (vector unsigned __int, > +vector unsigned __int128, const unsigned int); > +@exdent vector signed __int128 vec_slo (vector signed __int128, > +vector signed char); > +@exdent vector signed __int128 vec_slo (vector signed __int128, > +vector unsigned char); > +@exdent vector unsigned __int128 vec_slo (vector unsigned __int128, > +vector signed char); > +@exdent vector unsigned __int128 vec_slo (vector unsigned __int128, > +vector unsigned char); > +@exdent vector signed __int128 vec_sro (vector signed __int128, > +vector signed char); > +@exdent vector signed __int128 vec_sro (vector signed __int128, > +vector unsigned char); > +@exdent vector unsigned __int128 vec_sro (vector unsigned __int128, > +vector signed char); > +@exdent vector unsigned __int128 vec_sro (vector unsigned __int128, > +vector unsigned char); > +@exdent vector signed __int128 vec_srl (vector signed __int128, > +vector unsigned char); > +@exdent vector unsigned __int128 vec_srl (vector unsigned __int128, > +vector unsigned char); > +@end smallexample > + > +The above instances are extension of the existing overloaded built-ins > +@code{vec_sld}, @code{vec_sldw}, @code{vec_slo}, @code{vec_sro}, > @code{vec_srl} > +that are documented in the PVIPR. ... this praragraph is moved above to break two "smallexample"s. BR, Kewen > + > @node PowerPC Hardware Transactional Memory Built-in Functions > @subsection PowerPC Hardware Transactional Memory Built-in Functions > GCC provides two interfaces for accessing the Hardware Transactional > diff --git > a/gcc/testsuite/gcc.target/powerpc/vec-shift-double-runnable-int128.c > b/gcc/testsuite/gcc.target/powerpc/vec-shift-double-runnable-int128.c > new file mode 100644 > index 00000000000..0a329fa0e9c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/vec-shift-double-runnable-int128.c > @@ -0,0 +1,419 @@ > +/* { dg-do run { target power10_hw } } */ > +/* { dg-do compile { target { ! power10_hw } } } */ > +/* { dg-require-effective-target int128 } */ > + > +/* Need -save-temps for dg-final scan-assembler-times at end of test. */ > +/* { dg-options "-mdejagnu-cpu=power10 -save-temps" } */ > + > +#include <altivec.h> > + > +#define DEBUG 0 > + > +#if DEBUG > +#include <stdio.h> > + > +void print_i128 (unsigned __int128 val) > +{ > + printf(" 0x%016llx%016llx", > + (unsigned long long)(val >> 64), > + (unsigned long long)(val & 0xFFFFFFFFFFFFFFFF)); > +} > +#endif > + > +extern void abort (void); > + > +#if DEBUG > +#define ACTION_2ARG_UNSIGNED(NAME, TYPE_NAME) \ > + printf ("vec_%s (vector TYPE __int128, vector TYPE __int128) \n", #NAME); \ > + printf(" src_va_s128[0] = "); \ > + print_i128 ((unsigned __int128) src_va_##TYPE_NAME[0]); \ > + printf("\n"); \ > + printf(" src_vb_uc = 0x"); \ > + for (i = 0; i < 16; i++) \ > + printf("%02x", src_vb_uc[i]); \ > + printf("\n"); \ > + printf(" vresult[0] = "); \ > + print_i128 ((unsigned __int128) vresult[0]); \ > + printf("\n"); \ > + printf(" expected_vresult[0] = "); \ > + print_i128 ((unsigned __int128) expected_vresult[0]); \ > + printf("\n"); > + > +#define ACTION_2ARG_SIGNED(NAME, TYPE_NAME) \ > + printf ("vec_%s (vector TYPE __int128, vector TYPE __int128) \n", #NAME); \ > + printf(" src_va_s128[0] = "); \ > + print_i128 ((unsigned __int128) src_va_##TYPE_NAME[0]); \ > + printf("\n"); \ > + printf(" src_vb_sc = 0x"); \ > + for (i = 0; i < 16; i++) \ > + printf("%02x", src_vb_sc[i]); \ > + printf("\n"); \ > + printf(" vresult[0] = "); \ > + print_i128 ((unsigned __int128) vresult[0]); \ > + printf("\n"); \ > + printf(" expected_vresult[0] = "); \ > + print_i128 ((unsigned __int128) expected_vresult[0]); \ > + printf("\n"); > + > +#define ACTION_3ARG(NAME, TYPE_NAME, CONST) \ > + printf ("vec_%s (vector TYPE __int128, vector TYPE __int128, %s) \n", \ > + #NAME, #CONST); \ > + printf(" src_va_s128[0] = "); \ > + print_i128 ((unsigned __int128) src_va_##TYPE_NAME[0]); \ > + printf("\n"); \ > + printf(" src_vb_s128[0] = "); \ > + print_i128 ((unsigned __int128) src_vb_##TYPE_NAME[0]); \ > + printf("\n"); \ > + printf(" vresult[0] = "); \ > + print_i128 ((unsigned __int128) vresult[0]); \ > + printf("\n"); \ > + printf(" expected_vresult[0] = "); \ > + print_i128 ((unsigned __int128) expected_vresult[0]); \ > + printf("\n"); > + > +#else > +#define ACTION_2ARG_UNSIGNED(NAME, TYPE_NAME) \ > + abort(); > + > +#define ACTION_2ARG_SIGNED(NAME, TYPE_NAME) \ > + abort(); > + > +#define ACTION_2ARG(NAME, TYPE_NAME) \ > + abort(); > + > +#define ACTION_3ARG(NAME, TYPE_NAME, CONST) \ > + abort(); > +#endif > + > +/* Second argument of the builtin is vector unsigned char. */ > +#define TEST_2ARG_UNSIGNED(NAME, TYPE, TYPE_NAME, EXP_RESULT_HI, > EXP_RESULT_LO) \ > + { \ > + vector TYPE __int128 vresult; \ > + vector TYPE __int128 expected_vresult; \ > + int i; \ > + \ > + expected_vresult = (vector TYPE __int128) { EXP_RESULT_HI }; \ > + expected_vresult = (expected_vresult << 64) | \ > + (vector TYPE __int128) { EXP_RESULT_LO }; \ > + vresult = vec_##NAME (src_va_##TYPE_NAME, src_vb_uc); \ > + \ > + if (!vec_all_eq (vresult, expected_vresult)) { \ > + ACTION_2ARG_UNSIGNED(NAME, TYPE_NAME) \ > + } \ > + } > + > +/* Second argument of the builtin is vector signed char. */ > +#define TEST_2ARG_SIGNED(NAME, TYPE, TYPE_NAME, EXP_RESULT_HI, > EXP_RESULT_LO) \ > + { \ > + vector TYPE __int128 vresult; \ > + vector TYPE __int128 expected_vresult; \ > + int i; \ > + \ > + expected_vresult = (vector TYPE __int128) { EXP_RESULT_HI }; \ > + expected_vresult = (expected_vresult << 64) | \ > + (vector TYPE __int128) { EXP_RESULT_LO }; \ > + vresult = vec_##NAME (src_va_##TYPE_NAME, src_vb_sc); \ > + \ > + if (!vec_all_eq (vresult, expected_vresult)) { \ > + ACTION_2ARG_SIGNED(NAME, TYPE_NAME) \ > + } \ > + } > + > +#define TEST_3ARG(NAME, TYPE, TYPE_NAME, CONST, EXP_RESULT_HI, > EXP_RESULT_LO) \ > + { \ > + vector TYPE __int128 vresult; \ > + vector TYPE __int128 expected_vresult; \ > + \ > + expected_vresult = (vector TYPE __int128) { EXP_RESULT_HI }; \ > + expected_vresult = (expected_vresult << 64) | \ > + (vector TYPE __int128) { EXP_RESULT_LO }; \ > + vresult = vec_##NAME (src_va_##TYPE_NAME, src_vb_##TYPE_NAME, CONST); > \ > + \ > + if (!vec_all_eq (vresult, expected_vresult)) { \ > + ACTION_3ARG(NAME, TYPE_NAME, CONST) \ > + } \ > + } > + > +int > +main (int argc, char *argv []) > +{ > + vector signed __int128 vresult_s128; > + vector signed __int128 expected_vresult_s128; > + vector signed __int128 src_va_s128; > + vector signed __int128 src_vb_s128; > + vector unsigned __int128 vresult_u128; > + vector unsigned __int128 expected_vresult_u128; > + vector unsigned __int128 src_va_u128; > + vector unsigned __int128 src_vb_u128; > + vector signed char src_vb_sc; > + vector unsigned char src_vb_uc; > + > + /* 128-bit vector shift right tests, vec_srdb. */ > + src_va_s128 = (vector signed __int128) {0x12345678}; > + src_vb_s128 = (vector signed __int128) {0xFEDCBA90}; > + TEST_3ARG(srdb, signed, s128, 4, 0x8000000000000000, 0xFEDCBA9) > + > + src_va_u128 = (vector unsigned __int128) { 0xFEDCBA98 }; > + src_vb_u128 = (vector unsigned __int128) { 0x76543210}; > + TEST_3ARG(srdb, unsigned, u128, 4, 0x8000000000000000, 0x07654321) > + > + /* 128-bit vector shift left tests, vec_sldb. */ > + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; > + src_va_s128 = (src_va_s128 << 64) > + | (vector signed __int128) {0x123456789ABCDEF0}; > + src_vb_s128 = (vector signed __int128) {0xFEDCBA9876543210}; > + src_vb_s128 = (src_vb_s128 << 64) > + | (vector signed __int128) {0xFEDCBA9876543210}; > + TEST_3ARG(sldb, signed, s128, 4, 0x23456789ABCDEF01, 0x23456789ABCDEF0F) > + > + src_va_u128 = (vector unsigned __int128) {0xFEDCBA9876543210}; > + src_va_u128 = src_va_u128 << 64 > + | (vector unsigned __int128) {0xFEDCBA9876543210}; > + src_vb_u128 = (vector unsigned __int128) {0x123456789ABCDEF0}; > + src_vb_u128 = src_vb_u128 << 64 > + | (vector unsigned __int128) {0x123456789ABCDEF0}; > + TEST_3ARG(sldb, unsigned, u128, 4, 0xEDCBA9876543210F, 0xEDCBA98765432101) > + > + /* Shift left by octect tests, vec_sld. Shift is by immediate value > + times 8. */ > + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; > + src_va_s128 = (src_va_s128 << 64) > + | (vector signed __int128) {0x123456789ABCDEF0}; > + src_vb_s128 = (vector signed __int128) {0xFEDCBA9876543210}; > + src_vb_s128 = (src_vb_s128 << 64) > + | (vector signed __int128) {0xFEDCBA9876543210}; > + TEST_3ARG(sld, signed, s128, 4, 0x9abcdef012345678, 0x9abcdef0fedcba98) > + > + src_va_u128 = (vector unsigned __int128) {0xFEDCBA9876543210}; > + src_va_u128 = src_va_u128 << 64 > + | (vector unsigned __int128) {0xFEDCBA9876543210}; > + src_vb_u128 = (vector unsigned __int128) {0x123456789ABCDEF0}; > + src_vb_u128 = src_vb_u128 << 64 > + | (vector unsigned __int128) {0x123456789ABCDEF0}; > + TEST_3ARG(sld, unsigned, u128, 4, 0x76543210fedcba98, 0x7654321012345678) > + > + /* Vector left shift bytes within the vector, vec_sll. */ > + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; > + src_va_s128 = (src_va_s128 << 64) > + | (vector signed __int128) {0x123456789ABCDEF0}; > + src_vb_uc = (vector unsigned char) {0x01, 0x01, 0x01, 0x01, > + 0x01, 0x01, 0x01, 0x01, > + 0x01, 0x01, 0x01, 0x01, > + 0x01, 0x01, 0x01, 0x01}; > + TEST_2ARG_UNSIGNED(sll, signed, s128, 0x2468acf13579bde0, > + 0x2468acf13579bde0) > + > + src_va_u128 = (vector unsigned __int128) {0x123456789ABCDEF0}; > + src_va_u128 = src_va_u128 << 64 > + | (vector unsigned __int128) {0x123456789ABCDEF0}; > + src_vb_uc = (vector unsigned char) {0x02, 0x02, 0x02, 0x02, > + 0x02, 0x02, 0x02, 0x02, > + 0x02, 0x02, 0x02, 0x02, > + 0x02, 0x02, 0x02, 0x02}; > + TEST_2ARG_UNSIGNED(sll, unsigned, u128, 0x48d159e26af37bc0, > + 0x48d159e26af37bc0) > + > + /* Vector right shift bytes within the vector, vec_srl. */ > + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; > + src_va_s128 = (src_va_s128 << 64) > + | (vector signed __int128) {0x123456789ABCDEF0}; > + src_vb_uc = (vector unsigned char) {0x01, 0x01, 0x01, 0x01, > + 0x01, 0x01, 0x01, 0x01, > + 0x01, 0x01, 0x01, 0x01, > + 0x01, 0x01, 0x01, 0x01}; > + TEST_2ARG_UNSIGNED(srl, signed, s128, 0x091a2b3c4d5e6f78, > + 0x091a2b3c4d5e6f78) > + > + src_va_u128 = (vector unsigned __int128) {0x123456789ABCDEF0}; > + src_va_u128 = src_va_u128 << 64 > + | (vector unsigned __int128) {0x123456789ABCDEF0}; > + src_vb_uc = (vector unsigned char) {0x02, 0x02, 0x02, 0x02, > + 0x02, 0x02, 0x02, 0x02, > + 0x02, 0x02, 0x02, 0x02, > + 0x02, 0x02, 0x02, 0x02}; > + TEST_2ARG_UNSIGNED(srl, unsigned, u128, 0x48d159e26af37bc, > + 0x48d159e26af37bc) > + > + /* Shift left by octect tests, vec_slo. Shift is by immediate value > + bytes. Shift amount in bits 121:124. */ > + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; > + src_va_s128 = (src_va_s128 << 64) > + | (vector signed __int128) {0x123456789ABCDEF0}; > + /* Note vb_sc is Endian specific. */ > + /* The left shift amount is 1 byte, i.e. 1 * 8 bits. */ > +#if __LITTLE_ENDIAN__ > + src_vb_sc = (vector signed char) {0x1 << 3, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0}; > +#else > + src_vb_sc = (vector signed char) {0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x1 << 3}; > +#endif > + > + TEST_2ARG_SIGNED(slo, signed, s128, 0x3456789ABCDEF012, > + 0x3456789ABCDEF000) > + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; > + src_va_s128 = (src_va_s128 << 64) > + | (vector signed __int128) {0x123456789ABCDEF0}; > + > + /* Note vb_sc is Endian specific. */ > + /* The left shift amount is 2 bytes, i.e. 2 * 8 bits. */ > +#if __LITTLE_ENDIAN__ > + src_vb_uc = (vector unsigned char) {0x2 << 3, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0}; > +#else > + src_vb_uc = (vector unsigned char) {0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x2 << 3}; > +#endif > + TEST_2ARG_UNSIGNED(slo, signed, s128, 0x56789ABCDEF01234, > + 0x56789ABCDEF00000) > + > + src_va_u128 = (vector unsigned __int128) {0xFEDCBA9876543210}; > + src_va_u128 = src_va_u128 << 64 > + | (vector unsigned __int128) {0xFEDCBA9876543210}; > + /* Note vb_sc is Endian specific. */ > + /* The left shift amount is 3 bytes, i.e. 3 * 8 bits. */ > +#if __LITTLE_ENDIAN__ > + src_vb_sc = (vector signed char) {0x03<<3, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x00, 0x00, 0x00, 0x0}; > +#else > + src_vb_sc = (vector signed char) {0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x00, 0x00, 0x00, 0x03<<3}; > +#endif > + TEST_2ARG_SIGNED(slo, unsigned, u128, 0x9876543210FEDCBA, > + 0x9876543210000000) > + > + src_va_u128 = (vector unsigned __int128) {0xFEDCBA9876543210}; > + src_va_u128 = src_va_u128 << 64 > + | (vector unsigned __int128) {0xFEDCBA9876543210}; > + /* Note vb_sc is Endian specific. */ > + /* The left shift amount is 4 bytes, i.e. 4 * 8 bits. */ > +#if __LITTLE_ENDIAN__ > + src_vb_uc = (vector unsigned char) {0x04<<3, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0}; > +#else > + src_vb_uc = (vector unsigned char) {0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x00, 0x00, 0x00, 0x04<<3}; > +#endif > + TEST_2ARG_UNSIGNED(slo, unsigned, u128, 0x76543210FEDCBA98, > + 0x7654321000000000) > + > + /* Shift right by octect tests, vec_sro. Shift is by immediate value > + times 8. Shift amount in bits 121:124. */ > + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; > + src_va_s128 = (src_va_s128 << 64) > + | (vector signed __int128) {0x123456789ABCDEF0}; > + /* Note vb_sc is Endian specific. */ > + /* The left shift amount is 1 byte, i.e. 1 * 8 bits. */ > +#if __LITTLE_ENDIAN__ > + src_vb_sc = (vector signed char) {0x1 << 3, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0}; > +#else > + src_vb_sc = (vector signed char) {0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x1 << 3}; > +#endif > + TEST_2ARG_SIGNED(sro, signed, s128, 0x00123456789ABCDE, 0xF0123456789ABCDE) > + > + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; > + src_va_s128 = (src_va_s128 << 64) > + | (vector signed __int128) {0x123456789ABCDEF0}; > + /* Note vb_sc is Endian specific. */ > + /* The left shift amount is 1 byte, i.e. 1 * 8 bits. */ > +#if __LITTLE_ENDIAN__ > + src_vb_uc = (vector unsigned char) {0x2 << 3, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0}; > +#else > + src_vb_uc = (vector unsigned char) {0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x2 << 3}; > +#endif > + TEST_2ARG_UNSIGNED(sro, signed, s128, 0x0000123456789ABC, > + 0xDEF0123456789ABC) > + > + src_va_u128 = (vector unsigned __int128) {0xFEDCBA9876543210}; > + src_va_u128 = src_va_u128 << 64 > + | (vector unsigned __int128) {0xFEDCBA9876543210}; > + /* Note vb_sc is Endian specific. */ > + /* The left shift amount is 4 bytes, i.e. 4 * 8 bits. */ > +#if __LITTLE_ENDIAN__ > + src_vb_sc = (vector signed char) {0x03<<3, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x00, 0x00, 0x00, 0x0}; > +#else > + src_vb_sc = (vector signed char) {0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x00, 0x00, 0x00, 0x03<<3}; > +#endif > + TEST_2ARG_SIGNED(sro, unsigned, u128, 0x000000FEDCBA9876, > + 0x543210FEDCBA9876) > + > + src_va_u128 = (vector unsigned __int128) {0xFEDCBA9876543210}; > + src_va_u128 = src_va_u128 << 64 > + | (vector unsigned __int128) {0xFEDCBA9876543210}; > + /* Note vb_sc is Endian specific. */ > + /* The left shift amount is 4 bytes, i.e. 4 * 8 bits. */ > +#if __LITTLE_ENDIAN__ > + src_vb_uc = (vector unsigned char) {0x04<<3, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x00, 0x0}; > +#else > + src_vb_uc = (vector unsigned char) {0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x0, 0x0, 0x0, 0x0, > + 0x00, 0x0, 0x0, 0x04<<3}; > +#endif > + TEST_2ARG_UNSIGNED(sro, unsigned, u128, 0x00000000FEDCBA98, > + 0x76543210FEDCBA98) > + > + /* 128-bit vector shift left tests, vec_sldw. */ > + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; > + src_va_s128 = (src_va_s128 << 64) > + | (vector signed __int128) {0x123456789ABCDEF0}; > + src_vb_s128 = (vector signed __int128) {0xFEDCBA9876543210}; > + src_vb_s128 = (src_vb_s128 << 64) > + | (vector signed __int128) {0xFEDCBA9876543210}; > + TEST_3ARG(sldw, signed, s128, 1, 0x9ABCDEF012345678, 0x9ABCDEF0FEDCBA98) > + > + src_va_u128 = (vector unsigned __int128) {0x123456789ABCDEF0}; > + src_va_u128 = (src_va_u128 << 64) > + | (vector unsigned __int128) {0x123456789ABCDEF0}; > + src_vb_u128 = (vector unsigned __int128) {0xFEDCBA9876543210}; > + src_vb_u128 = (src_vb_u128 << 64) > + | (vector unsigned __int128) {0xFEDCBA9876543210}; > + TEST_3ARG(sldw, unsigned, u128, 2, 0x123456789ABCDEF0, 0xFEDCBA9876543210) > + > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-times {\mvsrdbi\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mvsldbi\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mvsl\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mvsr\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mvslo\M} 4 } } */ > +/* { dg-final { scan-assembler-times {\mvsro\M} 4 } } */