On Tue, Apr 5, 2016 at 3:36 PM, Bill Seurer <[email protected]> wrote:
> This patch adds support for the signed and unsigned int versions of the
> vec_adde altivec builtins from the Power Architecture 64-Bit ELF V2 ABI
> OpenPOWER ABI for Linux Supplement (16 July 2015 Version 1.1). There are
> many of the builtins that are missing and this is the first of a series
> of patches to add them.
>
> There aren't instructions for the int versions of vec_adde so the
> output code is built from other built-ins that do have instructions
> which in this case is just two vec_adds.
>
> The new test cases are executable tests which verify that the generated
> code produces expected values. C macros were used so that the same
> test case could be used for both the signed and unsigned versions. An
> extra executable test case is also included to ensure that the modified
> support for the __int128 versions of vec_adde is not broken. The same
> test case could not be used for both int and __int128 because of some
> differences in loading and storing the vectors.
>
> Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
> regressions. Is this ok for trunk?
>
> [gcc]
>
> 2016-04-06 Bill Seurer <[email protected]>
>
> * config/rs6000/rs6000-builtin.def (vec_adde): Change vec_adde to a
> special case builtin.
> * config/rs6000/rs6000-c.c (altivec_overloaded_builtins,
> altivec_resolve_overloaded_builtin): Remove ALTIVEC_BUILTIN_VEC_ADDE
> from altivec_overloaded_builtins structure. Add support for it to
> altivec_resolve_overloaded_builtin function.
> * config/rs6000/rs6000.c (altivec_init_builtins): Add definition
> for __builtin_vec_adde.
>
> [gcc/testsuite]
>
> 2016-04-06 Bill Seurer <[email protected]>
>
> * gcc.target/powerpc/vec-adde.c: New test.
> * gcc.target/powerpc/vec-adde-int128.c: New test.
>
> Index: gcc/config/rs6000/rs6000-builtin.def
> ===================================================================
> --- gcc/config/rs6000/rs6000-builtin.def (revision 234745)
> +++ gcc/config/rs6000/rs6000-builtin.def (working copy)
> @@ -951,7 +951,6 @@ BU_ALTIVEC_X (VEC_EXT_V4SF, "vec_ext_v4sf", CO
> before we get to the point about classifying the builtin type. */
>
> /* 3 argument Altivec overloaded builtins. */
> -BU_ALTIVEC_OVERLOAD_3 (ADDE, "adde")
> BU_ALTIVEC_OVERLOAD_3 (ADDEC, "addec")
> BU_ALTIVEC_OVERLOAD_3 (MADD, "madd")
> BU_ALTIVEC_OVERLOAD_3 (MADDS, "madds")
> @@ -1137,6 +1136,7 @@ BU_ALTIVEC_OVERLOAD_P (VCMPGT_P, "vcmpgt_p")
> BU_ALTIVEC_OVERLOAD_P (VCMPGE_P, "vcmpge_p")
>
> /* Overloaded Altivec builtins that are handled as special cases. */
> +BU_ALTIVEC_OVERLOAD_X (ADDE, "adde")
> BU_ALTIVEC_OVERLOAD_X (CTF, "ctf")
> BU_ALTIVEC_OVERLOAD_X (CTS, "cts")
> BU_ALTIVEC_OVERLOAD_X (CTU, "ctu")
> Index: gcc/config/rs6000/rs6000-c.c
> ===================================================================
> --- gcc/config/rs6000/rs6000-c.c (revision 234745)
> +++ gcc/config/rs6000/rs6000-c.c (working copy)
> @@ -842,11 +842,6 @@ const struct altivec_builtin_types altivec_overloa
> RS6000_BTI_unsigned_V1TI, 0 },
> { ALTIVEC_BUILTIN_VEC_ADDC, P8V_BUILTIN_VADDCUQ,
> RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
> - { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM,
> - RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
> - RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
> - { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM,
> - RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
> { ALTIVEC_BUILTIN_VEC_ADDEC, P8V_BUILTIN_VADDECUQ,
> RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
> RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
> @@ -4515,6 +4510,59 @@ assignment for unaligned loads and stores");
> warning (OPT_Wdeprecated, "vec_lvsr is deprecated for little endian; use
> \
> assignment for unaligned loads and stores");
>
> + if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
> + {
> + /* vec_adde needs to be special cased because there is no instruction
> + for the {un}signed int version */
End comment sentence with period and two spaces
> + if (nargs != 3)
> + {
> + error ("vec_adde only accepts 3 arguments");
> + return error_mark_node;
> + }
> +
> + tree arg0 = (*arglist)[0];
> + tree arg0_type = TREE_TYPE (arg0);
> + tree arg1 = (*arglist)[1];
> + tree arg1_type = TREE_TYPE (arg1);
> + tree arg2 = (*arglist)[2];
> + tree arg2_type = TREE_TYPE (arg2);
> +
> + /* All 3 arguments must be vectors of (signed or unsigned) (int or
> + __int128) and the types must match */
Same.
> + if ((arg0_type != arg1_type) || (arg1_type != arg2_type))
> + goto bad;
> + if (TREE_CODE (arg0_type) != VECTOR_TYPE)
> + goto bad;
> +
> + switch (TYPE_MODE (TREE_TYPE (arg0_type)))
> + {
> + /* for {un}signed ints,
> + vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb), carryv)
> */
Same.
> + case SImode:
> + {
> + vec<tree, va_gc> *params = make_tree_vector();
> + vec_safe_push (params, arg0);
> + vec_safe_push (params, arg1);
> + tree call = altivec_resolve_overloaded_builtin
> + (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD], params);
> + params = make_tree_vector();
> + vec_safe_push (params, call);
> + vec_safe_push (params, arg2);
> + return altivec_resolve_overloaded_builtin
> + (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD], params);
> + }
> + /* for {un}signed __int128s use the vaddeuqm instruction directly */
Same.
> + case TImode:
> + return altivec_resolve_overloaded_builtin
> + (loc, rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDEUQM],
> arglist);
> +
> + /* Types other than {un}signed int and {un}signed __int128
> + are errors */
Same.
> + default:
> + goto bad;
> + }
> + }
> +
> /* For now treat vec_splats and vec_promote as the same. */
> if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS
> || fcode == ALTIVEC_BUILTIN_VEC_PROMOTE)
> Index: gcc/config/rs6000/rs6000.c
> ===================================================================
> --- gcc/config/rs6000/rs6000.c (revision 234745)
> +++ gcc/config/rs6000/rs6000.c (working copy)
> @@ -15582,6 +15582,10 @@ altivec_init_builtins (void)
> = build_function_type_list (opaque_V4SI_type_node,
> opaque_V4SI_type_node, opaque_V4SI_type_node,
> integer_type_node, NULL_TREE);
> + tree opaque_ftype_opaque_opaque_opaque
> + = build_function_type_list (opaque_V4SI_type_node,
> + opaque_V4SI_type_node, opaque_V4SI_type_node,
> + opaque_V4SI_type_node, NULL_TREE);
> tree int_ftype_int_opaque_opaque
> = build_function_type_list (integer_type_node,
> integer_type_node, opaque_V4SI_type_node,
> @@ -15818,6 +15822,8 @@ altivec_init_builtins (void)
> def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int,
> ALTIVEC_BUILTIN_VEC_CTS);
> def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int,
> ALTIVEC_BUILTIN_VEC_CTU);
>
> + def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
> ALTIVEC_BUILTIN_VEC_ADDE);
> +
> /* Cell builtins. */
> def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid,
> ALTIVEC_BUILTIN_LVLX);
> def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid,
> ALTIVEC_BUILTIN_LVLXL);
> Index: gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c
> ===================================================================
> --- gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c (revision 0)
> +++ gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c (working copy)
> @@ -0,0 +1,78 @@
> +/* { dg-do run { target { powerpc64le-*-* } } } */
> +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } {
> "-mcpu=power8" } } */
> +/* { dg-options "-mcpu=power8 -O3" } */
> +
> +/* Test that the vec_adde builtin works as expected */
Same.
> +
> +#include "altivec.h"
> +
> +#define N 4096
> +
> +void abort ();
> +
> +#define define_test_functions(STYPE, NAMESUFFIX) \
> +\
> +STYPE result_##NAMESUFFIX[N]; \
> +STYPE addend1_##NAMESUFFIX[N]; \
> +STYPE addend2_##NAMESUFFIX[N]; \
> +STYPE carry_##NAMESUFFIX[N]; \
> +STYPE expected_##NAMESUFFIX[N]; \
> +\
> +__attribute__((noinline)) void vector_tests_##NAMESUFFIX () \
> +{ \
> + int i; \
> + vector STYPE v1, v2, v3, tmp; \
> + for (i = 0; i < N; i+=16/sizeof(STYPE)) { \
> + /* result=addend1+addend2+carry */ \
> + v1 = (vector STYPE) { addend1_##NAMESUFFIX[i] }; \
> + v2 = (vector STYPE) { addend2_##NAMESUFFIX[i] }; \
> + v3 = (vector STYPE) { carry_##NAMESUFFIX[i] }; \
> +\
> + tmp = vec_adde (v1, v2, v3); \
> + result_##NAMESUFFIX[i] = tmp[0]; \
> + } \
> +} \
> +\
> +__attribute__((noinline)) void init_##NAMESUFFIX () \
> +{ \
> + int i; \
> + for (i = 0; i < N; ++i) { \
> + result_##NAMESUFFIX[i] = 0; \
> + addend1_##NAMESUFFIX[i] = 1; \
> + addend2_##NAMESUFFIX[i] = 2; \
> + carry_##NAMESUFFIX[i] = (i%2==0)? 1: 0; \
> + expected_##NAMESUFFIX[i] = addend1_##NAMESUFFIX[i] + \
> + addend2_##NAMESUFFIX[i] + carry_##NAMESUFFIX[i]; \
> + } \
> +} \
> +\
> +__attribute__((noinline)) void verify_results_##NAMESUFFIX () \
> +{ \
> + int i; \
> + for (i = 0; i < N; ++i) { \
> + if (result_##NAMESUFFIX[i] != expected_##NAMESUFFIX[i]) \
> + abort(); \
> + } \
> +}
> +
> +
> +#define execute_test_functions(STYPE, NAMESUFFIX) \
> +{ \
> + init_##NAMESUFFIX (); \
> + vector_tests_##NAMESUFFIX (); \
> + verify_results_##NAMESUFFIX (); \
> +}
> +
> +
> +define_test_functions(signed __int128, si128);
> +define_test_functions(unsigned __int128, ui128);
> +
> +int main ()
> +{
> + execute_test_functions(signed __int128, si128);
> + execute_test_functions(unsigned __int128, ui128);
> +
> + return 0;
> +}
> +
> +
> Index: gcc/testsuite/gcc.target/powerpc/vec-adde.c
> ===================================================================
> --- gcc/testsuite/gcc.target/powerpc/vec-adde.c (revision 0)
> +++ gcc/testsuite/gcc.target/powerpc/vec-adde.c (working copy)
> @@ -0,0 +1,78 @@
> +/* { dg-do run { target { powerpc64le-*-* } } } */
> +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } {
> "-mcpu=power8" } } */
> +/* { dg-options "-mcpu=power8 -O3" } */
> +
> +/* Test that the vec_adde builtin works as expected */
Same.
> +
> +#include "altivec.h"
> +
> +#define N 4096
> +
> +void abort ();
> +
> +#define define_test_functions(STYPE, NAMESUFFIX) \
> +\
> +STYPE result_##NAMESUFFIX[N]; \
> +STYPE addend1_##NAMESUFFIX[N]; \
> +STYPE addend2_##NAMESUFFIX[N]; \
> +STYPE carry_##NAMESUFFIX[N]; \
> +STYPE expected_##NAMESUFFIX[N]; \
> +\
> +__attribute__((noinline)) void vector_tests_##NAMESUFFIX () \
> +{ \
> + int i; \
> + vector STYPE v1, v2, v3, tmp; \
> + for (i = 0; i < N; i+=16/sizeof(STYPE)) { \
> + /* result=addend1+addend2+carry */ \
> + v1 = vec_vsx_ld (0, &addend1_##NAMESUFFIX[i]); \
> + v2 = vec_vsx_ld (0, &addend2_##NAMESUFFIX[i]); \
> + v3 = vec_vsx_ld (0, &carry_##NAMESUFFIX[i]); \
> +\
> + tmp = vec_adde (v1, v2, v3); \
> + vec_vsx_st (tmp, 0, &result_##NAMESUFFIX[i]); \
> + } \
> +} \
> +\
> +__attribute__((noinline)) void init_##NAMESUFFIX () \
> +{ \
> + int i; \
> + for (i = 0; i < N; ++i) { \
> + result_##NAMESUFFIX[i] = 0; \
> + addend1_##NAMESUFFIX[i] = 1; \
> + addend2_##NAMESUFFIX[i] = 2; \
> + carry_##NAMESUFFIX[i] = (i%2==0)? 1: 0; \
> + expected_##NAMESUFFIX[i] = addend1_##NAMESUFFIX[i] + \
> + addend2_##NAMESUFFIX[i] + carry_##NAMESUFFIX[i]; \
> + } \
> +} \
> +\
> +__attribute__((noinline)) void verify_results_##NAMESUFFIX () \
> +{ \
> + int i; \
> + for (i = 0; i < N; ++i) { \
> + if (result_##NAMESUFFIX[i] != expected_##NAMESUFFIX[i]) \
> + abort(); \
> + } \
> +}
> +
> +
> +#define execute_test_functions(STYPE, NAMESUFFIX) \
> +{ \
> + init_##NAMESUFFIX (); \
> + vector_tests_##NAMESUFFIX (); \
> + verify_results_##NAMESUFFIX (); \
> +}
> +
> +
> +define_test_functions(signed int, si);
> +define_test_functions(unsigned int, ui);
> +
> +int main ()
> +{
> + execute_test_functions(signed int, si);
> + execute_test_functions(unsigned int, ui);
> +
> + return 0;
> +}
> +
> +
> --
>
> -Bill Seurer
>