[PATCH 5/6] rs6000, Add vector splat builtin support ---------------------------------- V4 Fixes:
Rebased on mainline. Changed FUTURE to P10. define_predicate "s32bit_cint_operand" removed unnecessary cast in definition. Changed define_expand "xxsplti32dx_v4si" to use "0" for constraint of operand 1. Changed define_insn "xxsplti32dx_v4si_inst" to use "0 for constraint of operand 1. Removed define_predicate "f32bit_const_operand". Use const_double_operand instead. *** Please provide feedback for the following change: (define_insn "xxspltidp_v2df_inst", Added print statement to warn of possible undefined behavior. The xxspltidp instruction result is undefined for subnormal inputs. I added a test for subnormal input with a fprintf to stderr to warn the "user" if the constant input is a subnormal value. I tried assert initially, but that causes GCC to exit ungracefully with no information as to why. I really didn't like that behavior. A subnormal input is not really a fatal error but the "user" needs to be told it is not a good idea. Not sure if using an fprintf statement in a define_insn is an acceptable thing either. But it does give the user the needed input and GCC exits normally. Let me know if there is a better option here. -------------------- v3 fixes: Minor cleanup in the ChangeLog description. ------------------------------------------------- v2 fixes: change log fixes gcc/config/rs6000/altivec changed name of define_insn and define_expand for vxxspltiw... to xxspltiw... Fixed spaces in gen_xxsplti32dx_v4sf_inst (operands[0], GEN_INT gcc/rs6000-builtin.def propagated name changes above where they are used. Updated definition for S32bit_cint_operand, c32bit_cint_operand, f32bit_const_operand predicate definitions. Changed name of rs6000_constF32toI32 to rs6000_const_f32_to_i32, propagated name change as needed. Replaced if test with gcc_assert(). Fixed description of vec_splatid() in documentation. ----------------------- GCC maintainers: The following patch adds support for the vec_splati, vec_splatid and vec_splati_ins builtins. This patch adds support for instructions that take a 32-bit immediate value that represents a floating point value. This support adds new predicates and a support function to properly handle the immediate value. The patch has been compiled and tested on powerpc64le-unknown-linux-gnu (Power 9 LE) with no regression errors. The test case was compiled on a Power 9 system and then tested on Mambo. Please let me know if this patch is acceptable for the mainline branch. Thanks. Carl Love -------------------------------------------------------- gcc/ChangeLog 2020-07-06 Carl Love <c...@us.ibm.com> * config/rs6000/altivec.h (vec_splati, vec_splatid, vec_splati_ins): Add defines. * config/rs6000/altivec.md (UNSPEC_XXSPLTIW, UNSPEC_XXSPLTID, UNSPEC_XXSPLTI32DX): New. (vxxspltiw_v4si, vxxspltiw_v4sf_inst, vxxspltidp_v2df_inst, vxxsplti32dx_v4si_inst, vxxsplti32dx_v4sf_inst): New define_insn. (vxxspltiw_v4sf, vxxspltidp_v2df, vxxsplti32dx_v4si, vxxsplti32dx_v4sf.): New define_expands. * config/rs6000/predicates (u1bit_cint_operand, s32bit_cint_operand, c32bit_cint_operand): New predicates. * config/rs6000/rs6000-builtin.def (VXXSPLTIW_V4SI, VXXSPLTIW_V4SF, VXXSPLTID): New definitions. (VXXSPLTI32DX_V4SI, VXXSPLTI32DX_V4SF): New BU_P10V_3 definitions. (XXSPLTIW, XXSPLTID): New definitions. (XXSPLTI32DX): Add definitions. * config/rs6000/rs6000-call.c (P10_BUILTIN_VEC_XXSPLTIW, P10_BUILTIN_VEC_XXSPLTID, P10_BUILTIN_VEC_XXSPLTI32DX): New definitions. * config/rs6000/rs6000-protos.h (rs6000_constF32toI32): New extern declaration. * config/rs6000/rs6000.c (rs6000_constF32toI32): New function. * config/doc/extend.texi: Add documentation for vec_splati, vec_splatid, and vec_splati_ins. gcc/testsuite/ChangeLog 2020-07-06 Carl Love <c...@us.ibm.com> * testsuite/gcc.target/powerpc/vec-splati-runnable: New test. --- gcc/config/rs6000/altivec.h | 3 + gcc/config/rs6000/altivec.md | 116 ++++++++++++++ gcc/config/rs6000/predicates.md | 15 ++ gcc/config/rs6000/rs6000-builtin.def | 12 ++ gcc/config/rs6000/rs6000-call.c | 19 +++ gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.c | 11 ++ gcc/doc/extend.texi | 35 +++++ .../gcc.target/powerpc/vec-splati-runnable.c | 145 ++++++++++++++++++ 9 files changed, 357 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index c202fcf25da..126409c168b 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -705,6 +705,9 @@ __altivec_scalar_pred(vec_any_nle, #define vec_replace_unaligned(a, b, c) __builtin_vec_replace_un (a, b, c) #define vec_sldb(a, b, c) __builtin_vec_sldb (a, b, c) #define vec_srdb(a, b, c) __builtin_vec_srdb (a, b, c) +#define vec_splati(a) __builtin_vec_xxspltiw (a) +#define vec_splatid(a) __builtin_vec_xxspltid (a) +#define vec_splati_ins(a, b, c) __builtin_vec_xxsplti32dx (a, b, c) #define vec_gnb(a, b) __builtin_vec_gnb (a, b) #define vec_clrl(a, b) __builtin_vec_clrl (a, b) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index c58fb3961e0..f6858b5bf2a 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -174,6 +174,9 @@ UNSPEC_VSTRIL UNSPEC_SLDB UNSPEC_SRDB + UNSPEC_XXSPLTIW + UNSPEC_XXSPLTID + UNSPEC_XXSPLTI32DX ]) (define_c_enum "unspecv" @@ -800,6 +803,119 @@ "vs<SLDB_lr>dbi %0,%1,%2,%3" [(set_attr "type" "vecsimple")]) +(define_insn "xxspltiw_v4si" + [(set (match_operand:V4SI 0 "register_operand" "=wa") + (unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")] + UNSPEC_XXSPLTIW))] + "TARGET_POWER10" + "xxspltiw %x0,%1" + [(set_attr "type" "vecsimple")]) + +(define_expand "xxspltiw_v4sf" + [(set (match_operand:V4SF 0 "register_operand" "=wa") + (unspec:V4SF [(match_operand:SF 1 "const_double_operand" "n")] + UNSPEC_XXSPLTIW))] + "TARGET_POWER10" +{ + long long value = rs6000_const_f32_to_i32 (operands[1]); + emit_insn (gen_xxspltiw_v4sf_inst (operands[0], GEN_INT (value))); + DONE; +}) + +(define_insn "xxspltiw_v4sf_inst" + [(set (match_operand:V4SF 0 "register_operand" "=wa") + (unspec:V4SF [(match_operand:SI 1 "c32bit_cint_operand" "n")] + UNSPEC_XXSPLTIW))] + "TARGET_POWER10" + "xxspltiw %x0,%c1" + [(set_attr "type" "vecsimple")]) + +(define_expand "xxspltidp_v2df" + [(set (match_operand:V2DF 0 "register_operand" ) + (unspec:V2DF [(match_operand:SF 1 "const_double_operand")] + UNSPEC_XXSPLTID))] + "TARGET_POWER10" +{ + long value = rs6000_const_f32_to_i32 (operands[1]); + emit_insn (gen_xxspltidp_v2df_inst (operands[0], GEN_INT (value))); + DONE; +}) + +(define_insn "xxspltidp_v2df_inst" + [(set (match_operand:V2DF 0 "register_operand" "=wa") + (unspec:V2DF [(match_operand:SI 1 "c32bit_cint_operand" "n")] + UNSPEC_XXSPLTID))] + "TARGET_POWER10" +{ + /* Note, the xxspltidp gives undefined results if the operand is a single + precision subnormal number. */ + int value = INTVAL (operands[1]); + + if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0)) + /* value is subnormal */ + fprintf (stderr, "WARNING: Result for the xxspltidp instruction is undefined for subnormal input values.\n"); + + return "xxspltidp %x0,%c1"; +} + [(set_attr "type" "vecsimple")]) + +(define_expand "xxsplti32dx_v4si" + [(set (match_operand:V4SI 0 "register_operand" "=wa") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:QI 2 "u1bit_cint_operand" "n") + (match_operand:SI 3 "s32bit_cint_operand" "n")] + UNSPEC_XXSPLTI32DX))] + "TARGET_POWER10" +{ + int index = INTVAL (operands[2]); + + if (!BYTES_BIG_ENDIAN) + index = 1 - index; + + emit_insn (gen_xxsplti32dx_v4si_inst (operands[0], operands[1], + GEN_INT (index), operands[3])); + DONE; +} + [(set_attr "type" "vecsimple")]) + +(define_insn "xxsplti32dx_v4si_inst" + [(set (match_operand:V4SI 0 "register_operand" "=wa") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:QI 2 "u1bit_cint_operand" "n") + (match_operand:SI 3 "s32bit_cint_operand" "n")] + UNSPEC_XXSPLTI32DX))] + "TARGET_POWER10" + "xxsplti32dx %x0,%2,%3" + [(set_attr "type" "vecsimple")]) + +(define_expand "xxsplti32dx_v4sf" + [(set (match_operand:V4SF 0 "register_operand" "=wa") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:QI 2 "u1bit_cint_operand" "n") + (match_operand:SF 3 "const_double_operand" "n")] + UNSPEC_XXSPLTI32DX))] + "TARGET_POWER10" +{ + int index = INTVAL (operands[2]); + long value = rs6000_const_f32_to_i32 (operands[3]); + if (!BYTES_BIG_ENDIAN) + index = 1 - index; + + emit_insn (gen_xxsplti32dx_v4sf_inst (operands[0], operands[1], + GEN_INT (index), GEN_INT (value))); + DONE; +}) + +(define_insn "xxsplti32dx_v4sf_inst" + [(set (match_operand:V4SF 0 "register_operand" "=wa") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:QI 2 "u1bit_cint_operand" "n") + (match_operand:SI 3 "s32bit_cint_operand" "n")] + UNSPEC_XXSPLTI32DX))] + "TARGET_POWER10" + "xxsplti32dx %x0,%2,%3" + [(set_attr "type" "vecsimple")]) + (define_expand "vstrir_<mode>" [(set (match_operand:VIshort 0 "altivec_register_operand") (unspec:VIshort [(match_operand:VIshort 1 "altivec_register_operand")] diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 9762855d76d..e9f7f143159 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -214,6 +214,11 @@ (and (match_code "const_int") (match_test "INTVAL (op) >= -16 && INTVAL (op) <= 15"))) +;; Return 1 if op is a unsigned 1-bit constant integer. +(define_predicate "u1bit_cint_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 1"))) + ;; Return 1 if op is a unsigned 3-bit constant integer. (define_predicate "u3bit_cint_operand" (and (match_code "const_int") @@ -272,6 +277,16 @@ (match_test "(unsigned HOST_WIDE_INT) (INTVAL (op) + 0x8000) >= 0x10000"))) +;; Return 1 if op is a 32-bit constant signed integer +(define_predicate "s32bit_cint_operand" + (and (match_code "const_int") + (match_test "(0x80000000 + UINTVAL (op)) >> 32 == 0"))) + +;; Return 1 if op is a constant 32-bit unsigned +(define_predicate "c32bit_cint_operand" + (and (match_code "const_int") + (match_test "((UINTVAL (op) >> 32) == 0)"))) + ;; Return 1 if op is a positive constant integer that is an exact power of 2. (define_predicate "exact_log2_cint_operand" (and (match_code "const_int") diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index c6fdfadeda8..ddfe287efc8 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2748,6 +2748,14 @@ BU_P10V_3 (VSRDB_V8HI, "vsrdb_v8hi", CONST, vsrdb_v8hi) BU_P10V_3 (VSRDB_V4SI, "vsrdb_v4si", CONST, vsrdb_v4si) BU_P10V_3 (VSRDB_V2DI, "vsrdb_v2di", CONST, vsrdb_v2di) +BU_P10V_1 (VXXSPLTIW_V4SI, "vxxspltiw_v4si", CONST, xxspltiw_v4si) +BU_P10V_1 (VXXSPLTIW_V4SF, "vxxspltiw_v4sf", CONST, xxspltiw_v4sf) + +BU_P10V_1 (VXXSPLTID, "vxxspltidp", CONST, xxspltidp_v2df) + +BU_P10V_3 (VXXSPLTI32DX_V4SI, "vxxsplti32dx_v4si", CONST, xxsplti32dx_v4si) +BU_P10V_3 (VXXSPLTI32DX_V4SF, "vxxsplti32dx_v4sf", CONST, xxsplti32dx_v4sf) + BU_P10V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi) BU_P10V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi) BU_P10V_1 (VSTRIBL, "vstribl", CONST, vstril_v16qi) @@ -2779,6 +2787,10 @@ BU_P10_OVERLOAD_1 (VSTRIL, "stril") BU_P10_OVERLOAD_1 (VSTRIR_P, "strir_p") BU_P10_OVERLOAD_1 (VSTRIL_P, "stril_p") + +BU_P10_OVERLOAD_1 (XXSPLTIW, "xxspltiw") +BU_P10_OVERLOAD_1 (XXSPLTID, "xxspltid") +BU_P10_OVERLOAD_3 (XXSPLTI32DX, "xxsplti32dx") /* 1 argument crypto functions. */ BU_CRYPTO_1 (VSBOX, "vsbox", CONST, crypto_vsbox_v2di) diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index edc67fafd88..06320279138 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -5688,6 +5688,22 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTQI }, + { P10_BUILTIN_VEC_XXSPLTIW, P10_BUILTIN_VXXSPLTIW_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0, 0 }, + { P10_BUILTIN_VEC_XXSPLTIW, P10_BUILTIN_VXXSPLTIW_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_float, 0, 0 }, + + { P10_BUILTIN_VEC_XXSPLTID, P10_BUILTIN_VXXSPLTID, + RS6000_BTI_V2DF, RS6000_BTI_float, 0, 0 }, + + { P10_BUILTIN_VEC_XXSPLTI32DX, P10_BUILTIN_VXXSPLTI32DX_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_UINTQI, RS6000_BTI_INTSI }, + { P10_BUILTIN_VEC_XXSPLTI32DX, P10_BUILTIN_VXXSPLTI32DX_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTQI, + RS6000_BTI_UINTSI }, + { P10_BUILTIN_VEC_XXSPLTI32DX, P10_BUILTIN_VXXSPLTI32DX_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_UINTQI, RS6000_BTI_float }, + { P10_BUILTIN_VEC_SRDB, P10_BUILTIN_VSRDB_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_UINTQI }, @@ -14036,6 +14052,9 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, case ALTIVEC_BUILTIN_VSRH: case ALTIVEC_BUILTIN_VSRW: case P8V_BUILTIN_VSRD: + /* Vector splat immediate insert */ + case P10_BUILTIN_VXXSPLTI32DX_V4SI: + case P10_BUILTIN_VXXSPLTI32DX_V4SF: h.uns_p[2] = 1; break; diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 5508484ba19..c6158874ce9 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -274,6 +274,7 @@ extern void rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label); extern void rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label); +extern long long rs6000_const_f32_to_i32 (rtx operand); /* Declare functions in rs6000-c.c */ diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index fef72884b31..046adc02dfc 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -26767,6 +26767,17 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype) return NULL; } +long long +rs6000_const_f32_to_i32 (rtx operand) +{ + long long value; + const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand); + + gcc_assert (GET_MODE (operand) == SFmode); + REAL_VALUE_TO_TARGET_SINGLE (*rv, value); + return value; +} + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-rs6000.h" diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 1c39be37c1d..e9aa06553aa 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -21165,6 +21165,41 @@ using this built-in must be endian-aware. @findex vec_srdb +Vector Splat + +@smallexample +@exdent vector signed int vec_splati (const signed int); +@exdent vector float vec_splati (const float); +@end smallexample + +Splat a 32-bit immediate into a vector of words. + +@findex vec_splati + +@smallexample +@exdent vector double vec_splatid (const float); +@end smallexample + +Convert a single precision floating-point value to double-precision and splat +the result to a vector of double-precision floats. + +@findex vec_splatid + +@smallexample +@exdent vector signed int vec_splati_ins (vector signed int, +const unsigned int, const signed int); +@exdent vector unsigned int vec_splati_ins (vector unsigned int, +const unsigned int, const unsigned int); +@exdent vector float vec_splati_ins (vector float, const unsigned int, +const float); +@end smallexample + +Argument 2 must be either 0 or 1. Splat the value of argument 3 into the word +identified by argument 2 of each doubleword of argument 1 and return the +result. The other words of argument 1 are unchanged. + +@findex vec_splati_ins + @smallexample @exdent vector unsigned long long int @exdent vec_pext (vector unsigned long long int, vector unsigned long long int) diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c new file mode 100644 index 00000000000..a0ce456c6fd --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c @@ -0,0 +1,145 @@ +/* { dg-do run } */ +/* { dg-require-effective-target power10_hw } */ +/* { dg-options "-mdejagnu-cpu=power10" } */ +#include <altivec.h> + +#define DEBUG 0 + +#ifdef DEBUG +#include <stdio.h> +#endif + +extern void abort (void); + +int +main (int argc, char *argv []) +{ + int i; + vector int vsrc_a_int; + vector int vresult_int; + vector int expected_vresult_int; + int src_a_int = 13; + + vector unsigned int vsrc_a_uint; + vector unsigned int vresult_uint; + vector unsigned int expected_vresult_uint; + unsigned int src_a_uint = 7; + + vector float vresult_f; + vector float expected_vresult_f; + vector float vsrc_a_f; + float src_a_f = 23.0; + + vector double vsrc_a_d; + vector double vresult_d; + vector double expected_vresult_d; + + /* Vector splati word */ + vresult_int = (vector signed int) { 1, 2, 3, 4 }; + expected_vresult_int = (vector signed int) { -13, -13, -13, -13 }; + + vresult_int = vec_splati ( -13 ); + + if (!vec_all_eq (vresult_int, expected_vresult_int)) { +#if DEBUG + printf("ERROR, vec_splati (src_a_int)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n", + i, vresult_int[i], i, expected_vresult_int[i]); +#else + abort(); +#endif + } + + vresult_f = (vector float) { 1.0, 2.0, 3.0, 4.0 }; + expected_vresult_f = (vector float) { 23.0, 23.0, 23.0, 23.0 }; + + vresult_f = vec_splati (23.0f); + + if (!vec_all_eq (vresult_f, expected_vresult_f)) { +#if DEBUG + printf("ERROR, vec_splati (src_a_f)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_f[%d] = %f, expected_vresult_f[%d] = %f\n", + i, vresult_f[i], i, expected_vresult_f[i]); +#else + abort(); +#endif + } + + /* Vector splati double */ + vresult_d = (vector double) { 2.0, 3.0 }; + expected_vresult_d = (vector double) { -31.0, -31.0 }; + + vresult_d = vec_splatid (-31.0f); + + if (!vec_all_eq (vresult_d, expected_vresult_d)) { +#if DEBUG + printf("ERROR, vec_splati (-31.0f)\n"); + for(i = 0; i < 2; i++) + printf(" vresult_d[%i] = %f, expected_vresult_d[%i] = %f\n", + i, vresult_d[i], i, expected_vresult_d[i]); +#else + abort(); +#endif + } + + /* Vector splat immediate */ + vsrc_a_int = (vector int) { 2, 3, 4, 5 }; + vresult_int = (vector int) { 1, 1, 1, 1 }; + expected_vresult_int = (vector int) { 2, 20, 4, 20 }; + + vresult_int = vec_splati_ins (vsrc_a_int, 1, 20); + + if (!vec_all_eq (vresult_int, expected_vresult_int)) { +#if DEBUG + printf("ERROR, vec_splati_ins (vsrc_a_int, 1, 20)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_int[%i] = %d, expected_vresult_int[%i] = %d\n", + i, vresult_int[i], i, expected_vresult_int[i]); +#else + abort(); +#endif + } + + vsrc_a_uint = (vector unsigned int) { 4, 5, 6, 7 }; + vresult_uint = (vector unsigned int) { 1, 1, 1, 1 }; + expected_vresult_uint = (vector unsigned int) { 4, 40, 6, 40 }; + + vresult_uint = vec_splati_ins (vsrc_a_uint, 1, 40); + + if (!vec_all_eq (vresult_uint, expected_vresult_uint)) { +#if DEBUG + printf("ERROR, vec_splati_ins (vsrc_a_uint, 1, 40)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_uint[%i] = %d, expected_vresult_uint[%i] = %d\n", + i, vresult_uint[i], i, expected_vresult_uint[i]); +#else + abort(); +#endif + } + + vsrc_a_f = (vector float) { 2.0, 3.0, 4.0, 5.0 }; + vresult_f = (vector float) { 1.0, 1.0, 1.0, 1.0 }; + expected_vresult_f = (vector float) { 2.0, 20.1, 4.0, 20.1 }; + + vresult_f = vec_splati_ins (vsrc_a_f, 1, 20.1f); + + if (!vec_all_eq (vresult_f, expected_vresult_f)) { +#if DEBUG + printf("ERROR, vec_splati_ins (vsrc_a_f, 1, 20.1)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_f[%i] = %f, expected_vresult_f[%i] = %f\n", + i, vresult_f[i], i, expected_vresult_f[i]); +#else + abort(); +#endif + } + + return 0; +} + +/* { dg-final { scan-assembler-times {\msplati\M} 6 } } */ +/* { dg-final { scan-assembler-times {\msrdbi\M} 6 } } */ + + -- 2.17.1