GCC maintainers: The following patch adds support for some missing instances of builtins, vec_mergee, vec_mergeo, vec_float2. This patch adds the missing GCC functionality and test cases for the builtins. The patch has been run on:
powerpc64le-unknown-linux-gnu (Power 8 LE) powerpc64le-unknown-linux-gnu (Power 8 BE) powerpc64le-unknown-linux-gnu (Power 9 LE) without regressions. Please let me know if the following patch is acceptable. Thanks. Carl Love ____________________________________________________________________________________ gcc/ChangeLog: 2017-12-18 Carl Love <c...@us.ibm.com> * config/rs6000/altivec.md (p8_vmrgow): Add support for V2DI, V2DF, V4SI, V4SF types. (p8_vmrgew): Add support for V2DI, V2DF, V4SF types. * config/rs6000/rs6000-builtin.def: Add definitions for FLOAT2_V2DF, VMRGEW_V2DI, VMRGEW_V2DF, VMRGEW_V4SF, VMRGOW_V4SI, VMRGOW_V4SF, VMRGOW_V2DI, VMRGOW_V2DF. Remove definition for VMRGOW. * config/rs6000/rs6000-c.c (VSX_BUILTIN_VEC_FLOAT2, P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VEC_VMRGOW): Add definitions. * config/rs6000/rs6000-protos.h: Add extern defition for rs6000_generate_float2_double_code. * config/rs6000/rs6000.c (rs6000_generate_float2_double_code): Add function. * config/rs6000/vsx.md (vsx_xvcdpsp): Add define_insn. (float2_v2df): Add define_expand. gcc/testsuite/ChangeLog: 2017-12-18 Carl Love <c...@us.ibm.com> * gcc.target/powerpc/builtins-1.c (main): Add tests for vec_mergee and vec_mergeo builtins with float, double, long long, unsigned long long, bool long long arguments. * gcc.target/powerpc/builtins-3-runnable.c (main): Add test for vec_float2 with double arguments. * gcc.target/powerpc/builtins-mergew-mergow.c: New runable test for the vec_mergew and vec_mergow builtins. --- gcc/config/rs6000/altivec.md | 48 +++- gcc/config/rs6000/rs6000-builtin.def | 9 +- gcc/config/rs6000/rs6000-c.c | 32 ++- gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.c | 37 +++ gcc/config/rs6000/vsx.md | 27 +++ gcc/testsuite/gcc.target/powerpc/builtins-1.c | 12 + .../gcc.target/powerpc/builtins-3-runnable.c | 4 + .../gcc.target/powerpc/builtins-mergew-mergow.c | 263 +++++++++++++++++++++ 9 files changed, 423 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-mergew-mergow.c diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 7122f99..8e0dfcf 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1,3 +1,4 @@ + ;; AltiVec patterns. ;; Copyright (C) 2002-2017 Free Software Foundation, Inc. ;; Contributed by Aldy Hernandez (a...@quesejoda.com) @@ -1318,6 +1319,24 @@ } [(set_attr "type" "vecperm")]) +;; Power8 vector merge two V2DF/V2DI even words to V2DF +(define_expand "p8_vmrgew_<mode>" + [(use (match_operand:VSX_D 0 "vsx_register_operand" "")) + (use (match_operand:VSX_D 1 "vsx_register_operand" "")) + (use (match_operand:VSX_D 2 "vsx_register_operand" ""))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + rtvec v; + rtx x; + + v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); + x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); + + x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + ;; Power8 vector merge two V4SF/V4SI even words to V4SF (define_insn "p8_vmrgew_<mode>" [(set (match_operand:VSX_W 0 "register_operand" "=v") @@ -1336,12 +1355,12 @@ } [(set_attr "type" "vecperm")]) -(define_insn "p8_vmrgow" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (vec_select:V4SI - (vec_concat:V8SI - (match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")) +(define_insn "p8_vmrgow_<mode>" + [(set (match_operand:VSX_W 0 "register_operand" "=v") + (vec_select:VSX_W + (vec_concat:<VS_double> + (match_operand:VSX_W 1 "register_operand" "v") + (match_operand:VSX_W 2 "register_operand" "v")) (parallel [(const_int 1) (const_int 5) (const_int 3) (const_int 7)])))] "TARGET_P8_VECTOR" @@ -1353,6 +1372,23 @@ } [(set_attr "type" "vecperm")]) +(define_expand "p8_vmrgow_<mode>" + [(use (match_operand:VSX_D 0 "vsx_register_operand" "")) + (use (match_operand:VSX_D 1 "vsx_register_operand" "")) + (use (match_operand:VSX_D 2 "vsx_register_operand" ""))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + rtvec v; + rtx x; + + v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); + x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); + + x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + (define_insn "p8_vmrgew_<mode>_direct" [(set (match_operand:VSX_W 0 "register_operand" "=v") (unspec:VSX_W [(match_operand:VSX_W 1 "register_operand" "v") diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index cfb6e55..4b6d58c 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1649,6 +1649,7 @@ BU_VSX_2 (CMPLE_U16QI, "cmple_u16qi", CONST, vector_ngtuv16qi) BU_VSX_2 (CMPLE_U8HI, "cmple_u8hi", CONST, vector_ngtuv8hi) BU_VSX_2 (CMPLE_U4SI, "cmple_u4si", CONST, vector_ngtuv4si) BU_VSX_2 (CMPLE_U2DI, "cmple_u2di", CONST, vector_ngtuv2di) +BU_VSX_2 (FLOAT2_V2DF, "float2_v2df", CONST, float2_v2df) BU_VSX_2 (FLOAT2_V2DI, "float2_v2di", CONST, float2_v2di) BU_VSX_2 (UNS_FLOAT2_V2DI, "uns_float2_v2di", CONST, uns_float2_v2di) @@ -1923,8 +1924,14 @@ BU_P8V_AV_2 (VMINSD, "vminsd", CONST, sminv2di3) BU_P8V_AV_2 (VMAXSD, "vmaxsd", CONST, smaxv2di3) BU_P8V_AV_2 (VMINUD, "vminud", CONST, uminv2di3) BU_P8V_AV_2 (VMAXUD, "vmaxud", CONST, umaxv2di3) +BU_P8V_AV_2 (VMRGEW_V2DI, "vmrgew_v2di", CONST, p8_vmrgew_v2di) +BU_P8V_AV_2 (VMRGEW_V2DF, "vmrgew_v2df", CONST, p8_vmrgew_v2df) BU_P8V_AV_2 (VMRGEW_V4SI, "vmrgew_v4si", CONST, p8_vmrgew_v4si) -BU_P8V_AV_2 (VMRGOW, "vmrgow", CONST, p8_vmrgow) +BU_P8V_AV_2 (VMRGEW_V4SF, "vmrgew_v4sf", CONST, p8_vmrgew_v4sf) +BU_P8V_AV_2 (VMRGOW_V4SI, "vmrgow_v4si", CONST, p8_vmrgow_v4si) +BU_P8V_AV_2 (VMRGOW_V4SF, "vmrgow_v4sf", CONST, p8_vmrgow_v4sf) +BU_P8V_AV_2 (VMRGOW_V2DI, "vmrgow_v2di", CONST, p8_vmrgow_v2di) +BU_P8V_AV_2 (VMRGOW_V2DF, "vmrgow_v2df", CONST, p8_vmrgow_v2df) BU_P8V_AV_2 (VBPERMQ, "vbpermq", CONST, altivec_vbpermq) BU_P8V_AV_2 (VBPERMQ2, "vbpermq2", CONST, altivec_vbpermq2) BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum) diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index 645260a..eb646ff 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -1523,6 +1523,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SF, RS6000_BTI_V4SI, 0, 0 }, { VSX_BUILTIN_VEC_FLOAT, VSX_BUILTIN_XVCVUXWSP_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, 0, 0 }, + { VSX_BUILTIN_VEC_FLOAT2, VSX_BUILTIN_FLOAT2_V2DF, + RS6000_BTI_V4SF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { VSX_BUILTIN_VEC_FLOAT2, VSX_BUILTIN_FLOAT2_V2DI, RS6000_BTI_V4SF, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { VSX_BUILTIN_VEC_FLOAT2, VSX_BUILTIN_UNS_FLOAT2_V2DI, @@ -5480,6 +5482,17 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW_V4SI, @@ -5488,13 +5501,26 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, - { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, - { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, - { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMB, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 0728800..325aadb 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -73,6 +73,7 @@ extern void rs6000_expand_extract_even (rtx, rtx, rtx); extern void rs6000_expand_interleave (rtx, rtx, rtx, bool); extern void rs6000_scale_v2df (rtx, rtx, int); extern void rs6000_generate_float2_code (bool, rtx, rtx, rtx); +extern void rs6000_generate_float2_double_code (rtx, rtx, rtx); extern void rs6000_generate_vsigned2_code (bool, rtx, rtx, rtx); extern int expand_block_clear (rtx[]); extern int expand_block_move (rtx[]); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index d19a4cf..ab45220 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -39473,6 +39473,43 @@ rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) } void +rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2) +{ + rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3; + + rtx_tmp0 = gen_reg_rtx (V2DFmode); + rtx_tmp1 = gen_reg_rtx (V2DFmode); + + /* The destination of the vmrgew instruction layout is: + rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0]. + Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the + vmrgew instruction will be correct. */ + if (VECTOR_ELT_ORDER_BIG) + { + emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2, + GEN_INT (0))); + emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2, + GEN_INT (3))); + } + else + { + emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3))); + emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0))); + } + + rtx_tmp2 = gen_reg_rtx (V4SFmode); + rtx_tmp3 = gen_reg_rtx (V4SFmode); + + emit_insn (gen_vsx_xvcdpsp (rtx_tmp2, rtx_tmp0)); + emit_insn (gen_vsx_xvcdpsp (rtx_tmp3, rtx_tmp1)); + + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3)); + else + emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2)); +} + +void rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2) { rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3; diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 00d7656..8356075 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -372,6 +372,7 @@ UNSPEC_VSX_XVCVSXDDP UNSPEC_VSX_XVCVUXDDP UNSPEC_VSX_XVCVDPSXDS + UNSPEC_VSX_XVCDPSP UNSPEC_VSX_XVCVDPUXDS UNSPEC_VSX_SIGN_EXTEND UNSPEC_VSX_XVCVSPSXWS @@ -2171,6 +2172,14 @@ "xvcvuxdsp %x0,%x1" [(set_attr "type" "vecdouble")]) +(define_insn "vsx_xvcdpsp" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa") + (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")] + UNSPEC_VSX_XVCDPSP))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvdpsp %x0,%x1" + [(set_attr "type" "vecdouble")]) + ;; Convert from 32-bit to 64-bit types ;; Provide both vector and scalar targets (define_insn "vsx_xvcvsxwdp" @@ -2237,6 +2246,24 @@ "xvcvuxwsp %x0,%x1" [(set_attr "type" "vecfloat")]) +;; Generate float2 double +;; convert two double to float +(define_expand "float2_v2df" + [(use (match_operand:V4SF 0 "register_operand" "=wa")) + (use (match_operand:V2DF 1 "register_operand" "wa")) + (use (match_operand:V2DF 2 "register_operand" "wa"))] + "VECTOR_UNIT_VSX_P (V4SFmode)" +{ + rtx rtx_src1, rtx_src2, rtx_dst; + + rtx_dst = operands[0]; + rtx_src1 = operands[1]; + rtx_src2 = operands[2]; + + rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2); + DONE; +}) + ;; Generate float2 ;; convert two long long signed ints to float (define_expand "float2_v2di" diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.c b/gcc/testsuite/gcc.target/powerpc/builtins-1.c index 8d0b7c9..776955b 100644 --- a/gcc/testsuite/gcc.target/powerpc/builtins-1.c +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.c @@ -151,17 +151,29 @@ int main () vector unsigned long long u4 = vec_splat (u2, 1); vector bool long long l5 = vec_splat (ld, 0); vector bool long long l6 = vec_splat (ld, 1); + vector bool long long l10 = vec_mergee (ld, ld); + vector bool long long l11 = vec_mergeo (ld, ld); vector long long l7 = vec_div (l3, l4); vector unsigned long long u5 = vec_div (u3, u4); vector long long l8 = vec_mul (l3, l4); vector unsigned long long u6 = vec_mul (u3, u4); + vector long long l12 = vec_mergee (la, lb); + vector long long l13 = vec_mergeo (la, lb); + vector unsigned long long u8 = vec_mergee (u3, u4); + vector unsigned long long u9 = vec_mergeo (u3, u4); vector double dh = vec_ctf (la, -2); vector double di = vec_ctf (ua, 2); vector long long l9 = vec_cts (dh, -2); vector unsigned long long u7 = vec_ctu (di, 2); + vector float f1 = vec_mergee (fa, fb); + vector float f2 = vec_mergeo (fa, fb); + + vector double d1 = vec_mergee (da, db); + vector double d2 = vec_mergeo (da, db); + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-3-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-3-runnable.c index 17bb9b3..d0a8254 100644 --- a/gcc/testsuite/gcc.target/powerpc/builtins-3-runnable.c +++ b/gcc/testsuite/gcc.target/powerpc/builtins-3-runnable.c @@ -266,6 +266,10 @@ int main() vec_flt_result = vec_float2 (vec_ll_uns_int0, vec_ll_uns_int1); test_result_sp(ALL, vec_flt_result, vec_flt_expected); + vec_flt_expected = (vector float){34.0, 97.0, 214.0, -5.5}; + vec_flt_result = vec_float2 (vec_dble0, vec_dble1); + test_result_sp(ALL, vec_flt_result, vec_flt_expected); + /* conversion of even words in double precision vector to single precision vector */ vec_flt_expected = (vector float){-12.00, 00.00, -12345678901234.00, 0.00}; vec_flt_result = vec_floate (vec_ll_int0); diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-mergew-mergow.c b/gcc/testsuite/gcc.target/powerpc/builtins-mergew-mergow.c new file mode 100644 index 0000000..24df2cf --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/builtins-mergew-mergow.c @@ -0,0 +1,263 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-maltivec -mvsx" } */ + +#include <altivec.h> // vector +#include <stdlib.h> + +#ifdef DEBUG +#include <stdio.h> +#endif + +void abort (void); + +int main() { + vector signed int vec_si_arg1, vec_si_arg2; + vector signed int vec_si_expected, vec_si_result; + vector unsigned int vec_ui_arg1, vec_ui_arg2; + vector unsigned int vec_ui_expected, vec_ui_result; + vector signed long long vec_sll_arg1, vec_sll_arg2; + vector signed long long vec_sll_expected, vec_sll_result; + vector unsigned long long vec_ull_arg1, vec_ull_arg2; + vector unsigned long long vec_ull_expected, vec_ull_result; + vector bool long long vec_bll_arg1, vec_bll_arg2; + vector bool long long vec_bll_expected, vec_bll_result; + vector float vec_f_arg1, vec_f_arg2; + vector float vec_f_expected, vec_f_result; + vector double vec_d_arg1, vec_d_arg2; + vector double vec_d_expected, vec_d_result; + int i; + unsigned long long int value; + + /* Tests for vec_mergeo, Merges the odd-numbered halves of two vectors. */ + vec_si_arg1 = (vector int){-100, -101, 102, 103}; + vec_si_arg2 = (vector int){200, 201, 202, 203}; + + vec_si_result = vec_mergeo (vec_si_arg1, vec_si_arg2); + + vec_si_expected = (vector int){-101, 201, 103, 203}; + + for (i = 0; i < 4; i++) + if (vec_si_result[i] != vec_si_expected[i]) +#ifdef DEBUG + printf("ERROR vec_mergeo(): vec_si_result[%d] = %d, vec_si_expected[%d] = %d\n", + i, vec_si_result[i], i, vec_si_expected[i]); +#else + abort(); +#endif + + vec_ui_arg1 = (vector unsigned int){100, 101, 102, 103}; + vec_ui_arg2 = (vector unsigned int){200, 201, 202, 203}; + + vec_ui_result = vec_mergeo (vec_ui_arg1, vec_ui_arg2); + + vec_ui_expected = (vector unsigned int){101, 201, 103, 203}; + + for (i = 0; i < 4; i++) + if (vec_ui_result[i] != vec_ui_expected[i]) +#ifdef DEBUG + printf("ERROR vec_mergeo(): vec_ui_result[%d] = %d, vec_ui_expected[%d] = %d\n", + i, vec_ui_result[i], i, vec_ui_expected[i]); +#else + abort(); +#endif + + vec_sll_arg1 = (vector long long int){-300, -301}; + vec_sll_arg2 = (vector long long int){400, 401}; + + vec_sll_result = vec_mergeo (vec_sll_arg1, vec_sll_arg2); + + vec_sll_expected = (vector long long int){-301, 401}; + + for (i = 0; i < 2; i++) + if (vec_sll_result[i] != vec_sll_expected[i]) +#ifdef DEBUG + printf("ERROR vec_mergeo(): vec_sll_result[%d] = %lld, vec_sll_expected[%d] = %lld\n", + i, (long long int)vec_sll_result[i], + i, (long long int)vec_sll_expected[i]); +#else + abort(); +#endif + + vec_ull_arg1 = (vector unsigned long long int){500, 501}; + vec_ull_arg2 = (vector unsigned long long int){600, 601}; + + vec_ull_result = vec_mergeo (vec_ull_arg1, vec_ull_arg2); + + vec_ull_expected = (vector unsigned long long int){501, 601}; + + for (i = 0; i < 2; i++) + if (vec_ull_result[i] != vec_ull_expected[i]) +#ifdef DEBUG + printf("ERROR vec_mergeo(): vec_ull_result[%d] = %lld, vec_ull_expected[%d] = %lld\n", + i, (unsigned long long int)vec_ull_result[i], + i, (unsigned long long int)vec_ull_expected[i]); +#else + abort(); +#endif + + vec_bll_arg1 = (vector bool long long){0, 0}; + vec_bll_arg2 = (vector bool long long){1, 1}; + + vec_bll_result = vec_mergeo (vec_bll_arg1, vec_bll_arg2); + + vec_bll_expected = (vector bool long long){0, 1}; + + for (i = 0; i < 2; i++) + if (vec_bll_result[i] != vec_bll_expected[i]) +#ifdef DEBUG + printf("ERROR vec_mergeo(): vec_bll_result[%d] = %lld, vec_bll_expected[%d] = %lld\n", + i, vec_ull_result[i], + i, vec_ull_expected[i]); +#else + abort(); +#endif + + vec_f_arg1 = (vector float){100.0, 101.1, 102.2, 103.3}; + vec_f_arg2 = (vector float){200.0, 201.1, 202.2, 203.3}; + + vec_f_result = vec_mergeo (vec_f_arg1, vec_f_arg2); + + vec_f_expected = (vector float){101.1, 201.1, 103.3, 203.3}; + + for (i = 0; i < 4; i++) + if (vec_f_result[i] != vec_f_expected[i]) +#ifdef DEBUG + printf("ERROR vec_mergeo(): vec_f_result[%d] = %f, vec_f_expected[%d] = %f\n", + i, vec_f_result[i], i, vec_f_expected[i]); +#else + abort(); +#endif + + vec_d_arg1 = (vector double){300.0, 301.1}; + vec_d_arg2 = (vector double){400.0, 401.1}; + + vec_d_result = vec_mergeo (vec_d_arg1, vec_d_arg2); + + vec_d_expected = (vector double){301.1, 401.1}; + + for (i = 0; i < 2; i++) + if (vec_d_result[i] != vec_d_expected[i]) +#ifdef DEBUG + printf("ERROR vec_mergeo(): vec_d_result[%d] = %f, vec_d_expected[%d] = %f\n", + i, vec_d_result[i], i, vec_d_expected[i]); +#else + abort(); +#endif + + /* Tests for vec_mergee, Merges the even-numbered halves of two vectors. */ + vec_si_arg1 = (vector int){-100, -101, 102, 103}; + vec_si_arg2 = (vector int){200, 201, 202, 203}; + + vec_si_result = vec_mergee (vec_si_arg1, vec_si_arg2); + + vec_si_expected = (vector int){-100, 200, 102, 202}; + + for (i = 0; i < 4; i++) + if (vec_si_result[i] != vec_si_expected[i]) +#ifdef DEBUG + printf("ERROR vec_mergee(): vec_si_result[%d] = %d, vec_si_expected[%d] = %d\n", + i, vec_si_result[i], i, vec_si_expected[i]); +#else + abort(); +#endif + + vec_ui_arg1 = (vector unsigned int){100, 101, 102, 103}; + vec_ui_arg2 = (vector unsigned int){200, 201, 202, 203}; + + vec_ui_result = vec_mergee (vec_ui_arg1, vec_ui_arg2); + + vec_ui_expected = (vector unsigned int){100, 200, 102, 202}; + + for (i = 0; i < 4; i++) + if (vec_ui_result[i] != vec_ui_expected[i]) +#ifdef DEBUG + printf("ERROR vec_mergee(): vec_ui_result[%d] = %d, vec_ui_expected[%d] = %d\n", + i, vec_ui_result[i], i, vec_ui_expected[i]); +#else + abort(); +#endif + + vec_sll_arg1 = (vector signed long long int){-300, -301}; + vec_sll_arg2 = (vector signed long long int){400, 401}; + + vec_sll_result = vec_mergee (vec_sll_arg1, vec_sll_arg2); + + vec_sll_expected = (vector signed long long int){-300, 400}; + + for (i = 0; i < 2; i++) + if (vec_sll_result[i] != vec_sll_expected[i]) +#ifdef DEBUG + printf("ERROR vec_mergee(): vec_sll_result[%d] = %lld, vec_sll_expected[%d] = %lld\n", + i, (signed long long int)vec_sll_result[i], + i, (signed long long int)vec_sll_expected[i]); +#else + abort(); +#endif + + vec_ull_arg1 = (vector unsigned long long int){500, 501}; + vec_ull_arg2 = (vector unsigned long long int){600, 601}; + + vec_ull_result = vec_mergee (vec_ull_arg1, vec_ull_arg2); + + vec_ull_expected = (vector unsigned long long int){500, 600}; + + for (i = 0; i < 2; i++) + if (vec_ull_result[i] != vec_ull_expected[i]) +#ifdef DEBUG + printf("ERROR vec_mergee(): vec_ull_result[%d] = %lld, vec_ull_expected[%d] = %lld\n", + i, (unsigned long long int)vec_ull_result[i], + i, (unsigned long long int)vec_ull_expected[i]); +#else + abort(); +#endif + + vec_bll_arg1 = (vector bool long long){0, 0}; + vec_bll_arg2 = (vector bool long long){1, 1}; + + vec_bll_result = vec_mergee (vec_bll_arg1, vec_bll_arg2); + + vec_bll_expected = (vector bool long long){0, 1}; + + for (i = 0; i < 2; i++) + if (vec_bll_result[i] != vec_bll_expected[i]) +#ifdef DEBUG + printf("ERROR vec_mergee(): vec_bll_result[%d] = %lld, vec_bll_expected[%d] = %lld\n", + i, vec_ull_result[i], + i, vec_ull_expected[i]); +#else + abort(); +#endif + + vec_f_arg1 = (vector float){100.0, 101.1, 102.2, 103.3}; + vec_f_arg2 = (vector float){200.0, 201.1, 202.2, 203.3}; + + vec_f_result = vec_mergee (vec_f_arg1, vec_f_arg2); + + vec_f_expected = (vector float){100.0, 200.0, 102.2, 202.2}; + + for (i = 0; i < 4; i++) + if (vec_f_result[i] != vec_f_expected[i]) +#ifdef DEBUG + printf("ERROR vec_mergee(): vec_f_result[%d] = %f, vec_f_expected[%d] = %f\n", + i, vec_f_result[i], i, vec_f_expected[i]); +#else + abort(); +#endif + + vec_d_arg1 = (vector double){300.0, 301.1}; + vec_d_arg2 = (vector double){400.0, 401.1}; + + vec_d_result = vec_mergee (vec_d_arg1, vec_d_arg2); + + vec_d_expected = (vector double){300.0, 400.0}; + + for (i = 0; i < 2; i++) + if (vec_d_result[i] != vec_d_expected[i]) +#ifdef DEBUG + printf("ERROR vec_mergee(): vec_d_result[%d] = %f, vec_d_expected[%d] = %f\n", + i, vec_d_result[i], i, vec_d_expected[i]); +#else + abort(); +#endif +} -- 2.7.4