https://gcc.gnu.org/g:083918a343d6cb9fd28c8b47dd1138220d95c820
commit r15-2795-g083918a343d6cb9fd28c8b47dd1138220d95c820 Author: Carl Love <c...@linux.ibm.com> Date: Wed Aug 7 10:55:03 2024 -0400 rs6000, Add new overloaded vector shift builtin int128 variants Add the signed __int128 and unsigned __int128 argument types for the overloaded built-ins vec_sld, vec_sldb, vec_sldw, vec_sll, vec_slo, vec_srdb, vec_srl, vec_sro. For each of the new argument types add a testcase and update the documentation for the built-in. gcc/ChangeLog: * config/rs6000/altivec.md (vs<SLDB_lr>db_<mode>): Change define_insn iterator to VEC_IC. * config/rs6000/rs6000-builtins.def (__builtin_altivec_vsldoi_v1ti, __builtin_vsx_xxsldwi_v1ti, __builtin_altivec_vsldb_v1ti, __builtin_altivec_vsrdb_v1ti): New builtin definitions. * config/rs6000/rs6000-overload.def (vec_sld, vec_sldb, vec_sldw, vec_sll, vec_slo, vec_srdb, vec_srl, vec_sro): New overloaded definitions. * doc/extend.texi (vec_sld, vec_sldb, vec_sldw, vec_sll, vec_slo, vec_srdb, vec_srl, vec_sro): Add documentation for new overloaded built-ins. gcc/testsuite/ChangeLog: * gcc.target/powerpc/vec-shift-double-runnable-int128.c: New test file. Diff: --- gcc/config/rs6000/altivec.md | 6 +- gcc/config/rs6000/rs6000-builtins.def | 12 + gcc/config/rs6000/rs6000-overload.def | 40 ++ gcc/doc/extend.texi | 43 +++ .../powerpc/vec-shift-double-runnable-int128.c | 419 +++++++++++++++++++++ 5 files changed, 517 insertions(+), 3 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index aa9d8fffc901..1f5489b974f6 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -877,9 +877,9 @@ (define_int_iterator VSHIFT_DBL_LR [UNSPEC_SLDB UNSPEC_SRDB]) (define_insn "vs<SLDB_lr>db_<mode>" - [(set (match_operand:VI2 0 "register_operand" "=v") - (unspec:VI2 [(match_operand:VI2 1 "register_operand" "v") - (match_operand:VI2 2 "register_operand" "v") + [(set (match_operand:VEC_IC 0 "register_operand" "=v") + (unspec:VEC_IC [(match_operand:VEC_IC 1 "register_operand" "v") + (match_operand:VEC_IC 2 "register_operand" "v") (match_operand:QI 3 "const_0_to_12_operand" "n")] VSHIFT_DBL_LR))] "TARGET_POWER10" diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 0c3c884c1104..5b513a7ef2b8 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -963,6 +963,9 @@ const vss __builtin_altivec_vsldoi_8hi (vss, vss, const int<4>); VSLDOI_8HI altivec_vsldoi_v8hi {} + const vsq __builtin_altivec_vsldoi_v1ti (vsq, vsq, const int<4>); + VSLDOI_1TI altivec_vsldoi_v1ti {} + const vss __builtin_altivec_vslh (vss, vus); VSLH vashlv8hi3 {} @@ -1799,6 +1802,9 @@ const vsll __builtin_vsx_xxsldwi_2di (vsll, vsll, const int<2>); XXSLDWI_2DI vsx_xxsldwi_v2di {} + const vsq __builtin_vsx_xxsldwi_v1ti (vsq, vsq, const int<2>); + XXSLDWI_1TI vsx_xxsldwi_v1ti {} + const vf __builtin_vsx_xxsldwi_4sf (vf, vf, const int<2>); XXSLDWI_4SF vsx_xxsldwi_v4sf {} @@ -3267,6 +3273,9 @@ const vss __builtin_altivec_vsldb_v8hi (vss, vss, const int<3>); VSLDB_V8HI vsldb_v8hi {} + const vsq __builtin_altivec_vsldb_v1ti (vsq, vsq, const int<3>); + VSLDB_V1TI vsldb_v1ti {} + const vsq __builtin_altivec_vslq (vsq, vuq); VSLQ vashlv1ti3 {} @@ -3285,6 +3294,9 @@ const vss __builtin_altivec_vsrdb_v8hi (vss, vss, const int<3>); VSRDB_V8HI vsrdb_v8hi {} + const vsq __builtin_altivec_vsrdb_v1ti (vsq, vsq, const int<3>); + VSRDB_V1TI vsrdb_v1ti {} + const vsq __builtin_altivec_vsrq (vsq, vuq); VSRQ vlshrv1ti3 {} diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def index c4ecafc6f7ef..87495aded49e 100644 --- a/gcc/config/rs6000/rs6000-overload.def +++ b/gcc/config/rs6000/rs6000-overload.def @@ -3399,6 +3399,10 @@ VSLDOI_4SF vd __builtin_vec_sld (vd, vd, const int); VSLDOI_2DF + vsq __builtin_vec_sld (vsq, vsq, const int); + VSLDOI_1TI VSLDOI_VSQ + vuq __builtin_vec_sld (vuq, vuq, const int); + VSLDOI_1TI VSLDOI_VUQ [VEC_SLDB, vec_sldb, __builtin_vec_sldb] vsc __builtin_vec_sldb (vsc, vsc, const int); @@ -3417,6 +3421,10 @@ VSLDB_V2DI VSLDB_VSLL vull __builtin_vec_sldb (vull, vull, const int); VSLDB_V2DI VSLDB_VULL + vsq __builtin_vec_sldb (vsq, vsq, const int); + VSLDB_V1TI VSLDB_VSQ + vuq __builtin_vec_sldb (vuq, vuq, const int); + VSLDB_V1TI VSLDB_VUQ [VEC_SLDW, vec_sldw, __builtin_vec_sldw] vsc __builtin_vec_sldw (vsc, vsc, const int); @@ -3439,6 +3447,10 @@ XXSLDWI_4SF XXSLDWI_VF vd __builtin_vec_sldw (vd, vd, const int); XXSLDWI_2DF XXSLDWI_VD + vsq __builtin_vec_sldw (vsq, vsq, const int); + XXSLDWI_1TI XXSLDWI_VSQ + vuq __builtin_vec_sldw (vuq, vuq, const int); + XXSLDWI_1TI XXSLDWI_VUQ [VEC_SLL, vec_sll, __builtin_vec_sll] vsc __builtin_vec_sll (vsc, vuc); @@ -3459,6 +3471,10 @@ VSL VSL_VSLL vull __builtin_vec_sll (vull, vuc); VSL VSL_VULL + vsq __builtin_vec_sll (vsq, vuc); + VSL VSL_VSQ + vuq __builtin_vec_sll (vuq, vuc); + VSL VSL_VUQ ; The following variants are deprecated. vsc __builtin_vec_sll (vsc, vus); VSL VSL_VSC_VUS @@ -3554,6 +3570,14 @@ VSLO VSLO_VFS vf __builtin_vec_slo (vf, vuc); VSLO VSLO_VFU + vsq __builtin_vec_slo (vsq, vsc); + VSLO VSLDO_VSQS + vsq __builtin_vec_slo (vsq, vuc); + VSLO VSLDO_VSQU + vuq __builtin_vec_slo (vuq, vsc); + VSLO VSLDO_VUQS + vuq __builtin_vec_slo (vuq, vuc); + VSLO VSLDO_VUQU [VEC_SLV, vec_slv, __builtin_vec_vslv] vuc __builtin_vec_vslv (vuc, vuc); @@ -3699,6 +3723,10 @@ VSRDB_V2DI VSRDB_VSLL vull __builtin_vec_srdb (vull, vull, const int); VSRDB_V2DI VSRDB_VULL + vsq __builtin_vec_srdb (vsq, vsq, const int); + VSRDB_V1TI VSRDB_VSQ + vuq __builtin_vec_srdb (vuq, vuq, const int); + VSRDB_V1TI VSRDB_VUQ [VEC_SRL, vec_srl, __builtin_vec_srl] vsc __builtin_vec_srl (vsc, vuc); @@ -3719,6 +3747,10 @@ VSR VSR_VSLL vull __builtin_vec_srl (vull, vuc); VSR VSR_VULL + vsq __builtin_vec_srl (vsq, vuc); + VSR VSR_VSQ + vuq __builtin_vec_srl (vuq, vuc); + VSR VSR_VUQ ; The following variants are deprecated. vsc __builtin_vec_srl (vsc, vus); VSR VSR_VSC_VUS @@ -3808,6 +3840,14 @@ VSRO VSRO_VFS vf __builtin_vec_sro (vf, vuc); VSRO VSRO_VFU + vsq __builtin_vec_sro (vsq, vsc); + VSRO VSRDO_VSQS + vsq __builtin_vec_sro (vsq, vuc); + VSRO VSRDO_VSQU + vuq __builtin_vec_sro (vuq, vsc); + VSRO VSRDO_VUQS + vuq __builtin_vec_sro (vuq, vuc); + VSRO VSRDO_VUQU [VEC_SRV, vec_srv, __builtin_vec_vsrv] vuc __builtin_vec_vsrv (vuc, vuc); diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 48b27ff9f390..89fe5db7aed0 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -23596,6 +23596,10 @@ const unsigned int); vector signed long long, const unsigned int); @exdent vector unsigned long long vec_sldb (vector unsigned long long, vector unsigned long long, const unsigned int); +@exdent vector signed __int128 vec_sldb (vector signed __int128, +vector signed __int128, const unsigned int); +@exdent vector unsigned __int128 vec_sldb (vector unsigned __int128, +vector unsigned __int128, const unsigned int); @end smallexample Shift the combined input vectors left by the amount specified by the low-order @@ -23623,6 +23627,10 @@ const unsigned int); vector signed long long, const unsigned int); @exdent vector unsigned long long vec_srdb (vector unsigned long long, vector unsigned long long, const unsigned int); +@exdent vector signed __int128 vec_srdb (vector signed __int128, +vector signed __int128, const unsigned int); +@exdent vector unsigned __int128 vec_srdb (vector unsigned __int128, +vector unsigned __int128, const unsigned int); @end smallexample Shift the combined input vectors right by the amount specified by the low-order @@ -24118,6 +24126,41 @@ int vec_any_le (vector unsigned __int128, vector unsigned __int128); @end smallexample +The following instances are extension of the existing overloaded built-ins +@code{vec_sld}, @code{vec_sldw}, @code{vec_slo}, @code{vec_sro}, @code{vec_srl} +that are documented in the PVIPR. + +@smallexample +@exdent vector signed __int128 vec_sld (vector signed __int128, +vector signed __int128, const unsigned int); +@exdent vector unsigned __int128 vec_sld (vector unsigned __int128, +vector unsigned __int128, const unsigned int); +@exdent vector signed __int128 vec_sldw (vector signed __int128, +vector signed __int128, const unsigned int); +@exdent vector unsigned __int128 vec_sldw (vector unsigned __int, +vector unsigned __int128, const unsigned int); +@exdent vector signed __int128 vec_slo (vector signed __int128, +vector signed char); +@exdent vector signed __int128 vec_slo (vector signed __int128, +vector unsigned char); +@exdent vector unsigned __int128 vec_slo (vector unsigned __int128, +vector signed char); +@exdent vector unsigned __int128 vec_slo (vector unsigned __int128, +vector unsigned char); +@exdent vector signed __int128 vec_sro (vector signed __int128, +vector signed char); +@exdent vector signed __int128 vec_sro (vector signed __int128, +vector unsigned char); +@exdent vector unsigned __int128 vec_sro (vector unsigned __int128, +vector signed char); +@exdent vector unsigned __int128 vec_sro (vector unsigned __int128, +vector unsigned char); +@exdent vector signed __int128 vec_srl (vector signed __int128, +vector unsigned char); +@exdent vector unsigned __int128 vec_srl (vector unsigned __int128, +vector unsigned char); +@end smallexample + @node PowerPC Hardware Transactional Memory Built-in Functions @subsection PowerPC Hardware Transactional Memory Built-in Functions GCC provides two interfaces for accessing the Hardware Transactional diff --git a/gcc/testsuite/gcc.target/powerpc/vec-shift-double-runnable-int128.c b/gcc/testsuite/gcc.target/powerpc/vec-shift-double-runnable-int128.c new file mode 100644 index 000000000000..0a329fa0e9c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-shift-double-runnable-int128.c @@ -0,0 +1,419 @@ +/* { dg-do run { target power10_hw } } */ +/* { dg-do compile { target { ! power10_hw } } } */ +/* { dg-require-effective-target int128 } */ + +/* Need -save-temps for dg-final scan-assembler-times at end of test. */ +/* { dg-options "-mdejagnu-cpu=power10 -save-temps" } */ + +#include <altivec.h> + +#define DEBUG 0 + +#if DEBUG +#include <stdio.h> + +void print_i128 (unsigned __int128 val) +{ + printf(" 0x%016llx%016llx", + (unsigned long long)(val >> 64), + (unsigned long long)(val & 0xFFFFFFFFFFFFFFFF)); +} +#endif + +extern void abort (void); + +#if DEBUG +#define ACTION_2ARG_UNSIGNED(NAME, TYPE_NAME) \ + printf ("vec_%s (vector TYPE __int128, vector TYPE __int128) \n", #NAME); \ + printf(" src_va_s128[0] = "); \ + print_i128 ((unsigned __int128) src_va_##TYPE_NAME[0]); \ + printf("\n"); \ + printf(" src_vb_uc = 0x"); \ + for (i = 0; i < 16; i++) \ + printf("%02x", src_vb_uc[i]); \ + printf("\n"); \ + printf(" vresult[0] = "); \ + print_i128 ((unsigned __int128) vresult[0]); \ + printf("\n"); \ + printf(" expected_vresult[0] = "); \ + print_i128 ((unsigned __int128) expected_vresult[0]); \ + printf("\n"); + +#define ACTION_2ARG_SIGNED(NAME, TYPE_NAME) \ + printf ("vec_%s (vector TYPE __int128, vector TYPE __int128) \n", #NAME); \ + printf(" src_va_s128[0] = "); \ + print_i128 ((unsigned __int128) src_va_##TYPE_NAME[0]); \ + printf("\n"); \ + printf(" src_vb_sc = 0x"); \ + for (i = 0; i < 16; i++) \ + printf("%02x", src_vb_sc[i]); \ + printf("\n"); \ + printf(" vresult[0] = "); \ + print_i128 ((unsigned __int128) vresult[0]); \ + printf("\n"); \ + printf(" expected_vresult[0] = "); \ + print_i128 ((unsigned __int128) expected_vresult[0]); \ + printf("\n"); + +#define ACTION_3ARG(NAME, TYPE_NAME, CONST) \ + printf ("vec_%s (vector TYPE __int128, vector TYPE __int128, %s) \n", \ + #NAME, #CONST); \ + printf(" src_va_s128[0] = "); \ + print_i128 ((unsigned __int128) src_va_##TYPE_NAME[0]); \ + printf("\n"); \ + printf(" src_vb_s128[0] = "); \ + print_i128 ((unsigned __int128) src_vb_##TYPE_NAME[0]); \ + printf("\n"); \ + printf(" vresult[0] = "); \ + print_i128 ((unsigned __int128) vresult[0]); \ + printf("\n"); \ + printf(" expected_vresult[0] = "); \ + print_i128 ((unsigned __int128) expected_vresult[0]); \ + printf("\n"); + +#else +#define ACTION_2ARG_UNSIGNED(NAME, TYPE_NAME) \ + abort(); + +#define ACTION_2ARG_SIGNED(NAME, TYPE_NAME) \ + abort(); + +#define ACTION_2ARG(NAME, TYPE_NAME) \ + abort(); + +#define ACTION_3ARG(NAME, TYPE_NAME, CONST) \ + abort(); +#endif + +/* Second argument of the builtin is vector unsigned char. */ +#define TEST_2ARG_UNSIGNED(NAME, TYPE, TYPE_NAME, EXP_RESULT_HI, EXP_RESULT_LO) \ + { \ + vector TYPE __int128 vresult; \ + vector TYPE __int128 expected_vresult; \ + int i; \ + \ + expected_vresult = (vector TYPE __int128) { EXP_RESULT_HI }; \ + expected_vresult = (expected_vresult << 64) | \ + (vector TYPE __int128) { EXP_RESULT_LO }; \ + vresult = vec_##NAME (src_va_##TYPE_NAME, src_vb_uc); \ + \ + if (!vec_all_eq (vresult, expected_vresult)) { \ + ACTION_2ARG_UNSIGNED(NAME, TYPE_NAME) \ + } \ + } + +/* Second argument of the builtin is vector signed char. */ +#define TEST_2ARG_SIGNED(NAME, TYPE, TYPE_NAME, EXP_RESULT_HI, EXP_RESULT_LO) \ + { \ + vector TYPE __int128 vresult; \ + vector TYPE __int128 expected_vresult; \ + int i; \ + \ + expected_vresult = (vector TYPE __int128) { EXP_RESULT_HI }; \ + expected_vresult = (expected_vresult << 64) | \ + (vector TYPE __int128) { EXP_RESULT_LO }; \ + vresult = vec_##NAME (src_va_##TYPE_NAME, src_vb_sc); \ + \ + if (!vec_all_eq (vresult, expected_vresult)) { \ + ACTION_2ARG_SIGNED(NAME, TYPE_NAME) \ + } \ + } + +#define TEST_3ARG(NAME, TYPE, TYPE_NAME, CONST, EXP_RESULT_HI, EXP_RESULT_LO) \ + { \ + vector TYPE __int128 vresult; \ + vector TYPE __int128 expected_vresult; \ + \ + expected_vresult = (vector TYPE __int128) { EXP_RESULT_HI }; \ + expected_vresult = (expected_vresult << 64) | \ + (vector TYPE __int128) { EXP_RESULT_LO }; \ + vresult = vec_##NAME (src_va_##TYPE_NAME, src_vb_##TYPE_NAME, CONST); \ + \ + if (!vec_all_eq (vresult, expected_vresult)) { \ + ACTION_3ARG(NAME, TYPE_NAME, CONST) \ + } \ + } + +int +main (int argc, char *argv []) +{ + vector signed __int128 vresult_s128; + vector signed __int128 expected_vresult_s128; + vector signed __int128 src_va_s128; + vector signed __int128 src_vb_s128; + vector unsigned __int128 vresult_u128; + vector unsigned __int128 expected_vresult_u128; + vector unsigned __int128 src_va_u128; + vector unsigned __int128 src_vb_u128; + vector signed char src_vb_sc; + vector unsigned char src_vb_uc; + + /* 128-bit vector shift right tests, vec_srdb. */ + src_va_s128 = (vector signed __int128) {0x12345678}; + src_vb_s128 = (vector signed __int128) {0xFEDCBA90}; + TEST_3ARG(srdb, signed, s128, 4, 0x8000000000000000, 0xFEDCBA9) + + src_va_u128 = (vector unsigned __int128) { 0xFEDCBA98 }; + src_vb_u128 = (vector unsigned __int128) { 0x76543210}; + TEST_3ARG(srdb, unsigned, u128, 4, 0x8000000000000000, 0x07654321) + + /* 128-bit vector shift left tests, vec_sldb. */ + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; + src_va_s128 = (src_va_s128 << 64) + | (vector signed __int128) {0x123456789ABCDEF0}; + src_vb_s128 = (vector signed __int128) {0xFEDCBA9876543210}; + src_vb_s128 = (src_vb_s128 << 64) + | (vector signed __int128) {0xFEDCBA9876543210}; + TEST_3ARG(sldb, signed, s128, 4, 0x23456789ABCDEF01, 0x23456789ABCDEF0F) + + src_va_u128 = (vector unsigned __int128) {0xFEDCBA9876543210}; + src_va_u128 = src_va_u128 << 64 + | (vector unsigned __int128) {0xFEDCBA9876543210}; + src_vb_u128 = (vector unsigned __int128) {0x123456789ABCDEF0}; + src_vb_u128 = src_vb_u128 << 64 + | (vector unsigned __int128) {0x123456789ABCDEF0}; + TEST_3ARG(sldb, unsigned, u128, 4, 0xEDCBA9876543210F, 0xEDCBA98765432101) + + /* Shift left by octect tests, vec_sld. Shift is by immediate value + times 8. */ + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; + src_va_s128 = (src_va_s128 << 64) + | (vector signed __int128) {0x123456789ABCDEF0}; + src_vb_s128 = (vector signed __int128) {0xFEDCBA9876543210}; + src_vb_s128 = (src_vb_s128 << 64) + | (vector signed __int128) {0xFEDCBA9876543210}; + TEST_3ARG(sld, signed, s128, 4, 0x9abcdef012345678, 0x9abcdef0fedcba98) + + src_va_u128 = (vector unsigned __int128) {0xFEDCBA9876543210}; + src_va_u128 = src_va_u128 << 64 + | (vector unsigned __int128) {0xFEDCBA9876543210}; + src_vb_u128 = (vector unsigned __int128) {0x123456789ABCDEF0}; + src_vb_u128 = src_vb_u128 << 64 + | (vector unsigned __int128) {0x123456789ABCDEF0}; + TEST_3ARG(sld, unsigned, u128, 4, 0x76543210fedcba98, 0x7654321012345678) + + /* Vector left shift bytes within the vector, vec_sll. */ + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; + src_va_s128 = (src_va_s128 << 64) + | (vector signed __int128) {0x123456789ABCDEF0}; + src_vb_uc = (vector unsigned char) {0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01}; + TEST_2ARG_UNSIGNED(sll, signed, s128, 0x2468acf13579bde0, + 0x2468acf13579bde0) + + src_va_u128 = (vector unsigned __int128) {0x123456789ABCDEF0}; + src_va_u128 = src_va_u128 << 64 + | (vector unsigned __int128) {0x123456789ABCDEF0}; + src_vb_uc = (vector unsigned char) {0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02}; + TEST_2ARG_UNSIGNED(sll, unsigned, u128, 0x48d159e26af37bc0, + 0x48d159e26af37bc0) + + /* Vector right shift bytes within the vector, vec_srl. */ + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; + src_va_s128 = (src_va_s128 << 64) + | (vector signed __int128) {0x123456789ABCDEF0}; + src_vb_uc = (vector unsigned char) {0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01}; + TEST_2ARG_UNSIGNED(srl, signed, s128, 0x091a2b3c4d5e6f78, + 0x091a2b3c4d5e6f78) + + src_va_u128 = (vector unsigned __int128) {0x123456789ABCDEF0}; + src_va_u128 = src_va_u128 << 64 + | (vector unsigned __int128) {0x123456789ABCDEF0}; + src_vb_uc = (vector unsigned char) {0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02}; + TEST_2ARG_UNSIGNED(srl, unsigned, u128, 0x48d159e26af37bc, + 0x48d159e26af37bc) + + /* Shift left by octect tests, vec_slo. Shift is by immediate value + bytes. Shift amount in bits 121:124. */ + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; + src_va_s128 = (src_va_s128 << 64) + | (vector signed __int128) {0x123456789ABCDEF0}; + /* Note vb_sc is Endian specific. */ + /* The left shift amount is 1 byte, i.e. 1 * 8 bits. */ +#if __LITTLE_ENDIAN__ + src_vb_sc = (vector signed char) {0x1 << 3, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0}; +#else + src_vb_sc = (vector signed char) {0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x1 << 3}; +#endif + + TEST_2ARG_SIGNED(slo, signed, s128, 0x3456789ABCDEF012, + 0x3456789ABCDEF000) + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; + src_va_s128 = (src_va_s128 << 64) + | (vector signed __int128) {0x123456789ABCDEF0}; + + /* Note vb_sc is Endian specific. */ + /* The left shift amount is 2 bytes, i.e. 2 * 8 bits. */ +#if __LITTLE_ENDIAN__ + src_vb_uc = (vector unsigned char) {0x2 << 3, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0}; +#else + src_vb_uc = (vector unsigned char) {0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x2 << 3}; +#endif + TEST_2ARG_UNSIGNED(slo, signed, s128, 0x56789ABCDEF01234, + 0x56789ABCDEF00000) + + src_va_u128 = (vector unsigned __int128) {0xFEDCBA9876543210}; + src_va_u128 = src_va_u128 << 64 + | (vector unsigned __int128) {0xFEDCBA9876543210}; + /* Note vb_sc is Endian specific. */ + /* The left shift amount is 3 bytes, i.e. 3 * 8 bits. */ +#if __LITTLE_ENDIAN__ + src_vb_sc = (vector signed char) {0x03<<3, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x00, 0x00, 0x00, 0x0}; +#else + src_vb_sc = (vector signed char) {0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x00, 0x00, 0x00, 0x03<<3}; +#endif + TEST_2ARG_SIGNED(slo, unsigned, u128, 0x9876543210FEDCBA, + 0x9876543210000000) + + src_va_u128 = (vector unsigned __int128) {0xFEDCBA9876543210}; + src_va_u128 = src_va_u128 << 64 + | (vector unsigned __int128) {0xFEDCBA9876543210}; + /* Note vb_sc is Endian specific. */ + /* The left shift amount is 4 bytes, i.e. 4 * 8 bits. */ +#if __LITTLE_ENDIAN__ + src_vb_uc = (vector unsigned char) {0x04<<3, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0}; +#else + src_vb_uc = (vector unsigned char) {0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x00, 0x00, 0x00, 0x04<<3}; +#endif + TEST_2ARG_UNSIGNED(slo, unsigned, u128, 0x76543210FEDCBA98, + 0x7654321000000000) + + /* Shift right by octect tests, vec_sro. Shift is by immediate value + times 8. Shift amount in bits 121:124. */ + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; + src_va_s128 = (src_va_s128 << 64) + | (vector signed __int128) {0x123456789ABCDEF0}; + /* Note vb_sc is Endian specific. */ + /* The left shift amount is 1 byte, i.e. 1 * 8 bits. */ +#if __LITTLE_ENDIAN__ + src_vb_sc = (vector signed char) {0x1 << 3, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0}; +#else + src_vb_sc = (vector signed char) {0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x1 << 3}; +#endif + TEST_2ARG_SIGNED(sro, signed, s128, 0x00123456789ABCDE, 0xF0123456789ABCDE) + + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; + src_va_s128 = (src_va_s128 << 64) + | (vector signed __int128) {0x123456789ABCDEF0}; + /* Note vb_sc is Endian specific. */ + /* The left shift amount is 1 byte, i.e. 1 * 8 bits. */ +#if __LITTLE_ENDIAN__ + src_vb_uc = (vector unsigned char) {0x2 << 3, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0}; +#else + src_vb_uc = (vector unsigned char) {0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x2 << 3}; +#endif + TEST_2ARG_UNSIGNED(sro, signed, s128, 0x0000123456789ABC, + 0xDEF0123456789ABC) + + src_va_u128 = (vector unsigned __int128) {0xFEDCBA9876543210}; + src_va_u128 = src_va_u128 << 64 + | (vector unsigned __int128) {0xFEDCBA9876543210}; + /* Note vb_sc is Endian specific. */ + /* The left shift amount is 4 bytes, i.e. 4 * 8 bits. */ +#if __LITTLE_ENDIAN__ + src_vb_sc = (vector signed char) {0x03<<3, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x00, 0x00, 0x00, 0x0}; +#else + src_vb_sc = (vector signed char) {0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x00, 0x00, 0x00, 0x03<<3}; +#endif + TEST_2ARG_SIGNED(sro, unsigned, u128, 0x000000FEDCBA9876, + 0x543210FEDCBA9876) + + src_va_u128 = (vector unsigned __int128) {0xFEDCBA9876543210}; + src_va_u128 = src_va_u128 << 64 + | (vector unsigned __int128) {0xFEDCBA9876543210}; + /* Note vb_sc is Endian specific. */ + /* The left shift amount is 4 bytes, i.e. 4 * 8 bits. */ +#if __LITTLE_ENDIAN__ + src_vb_uc = (vector unsigned char) {0x04<<3, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x00, 0x0}; +#else + src_vb_uc = (vector unsigned char) {0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x00, 0x0, 0x0, 0x04<<3}; +#endif + TEST_2ARG_UNSIGNED(sro, unsigned, u128, 0x00000000FEDCBA98, + 0x76543210FEDCBA98) + + /* 128-bit vector shift left tests, vec_sldw. */ + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; + src_va_s128 = (src_va_s128 << 64) + | (vector signed __int128) {0x123456789ABCDEF0}; + src_vb_s128 = (vector signed __int128) {0xFEDCBA9876543210}; + src_vb_s128 = (src_vb_s128 << 64) + | (vector signed __int128) {0xFEDCBA9876543210}; + TEST_3ARG(sldw, signed, s128, 1, 0x9ABCDEF012345678, 0x9ABCDEF0FEDCBA98) + + src_va_u128 = (vector unsigned __int128) {0x123456789ABCDEF0}; + src_va_u128 = (src_va_u128 << 64) + | (vector unsigned __int128) {0x123456789ABCDEF0}; + src_vb_u128 = (vector unsigned __int128) {0xFEDCBA9876543210}; + src_vb_u128 = (src_vb_u128 << 64) + | (vector unsigned __int128) {0xFEDCBA9876543210}; + TEST_3ARG(sldw, unsigned, u128, 2, 0x123456789ABCDEF0, 0xFEDCBA9876543210) + + + return 0; +} + +/* { dg-final { scan-assembler-times {\mvsrdbi\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvsldbi\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvsl\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvsr\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvslo\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mvsro\M} 4 } } */