This patch provides for interpreting element numbers for the Altivec vec_insert and vec_extract intrinsics as big-endian (left to right in a vector register) when targeting a little endian machine and specifying -maltivec=be. New test cases are added to test this functionality on all supported vector types.
Bootstrapped and tested with no new regressions on powerpc64{,le}-unknown-linux-gnu. Ok for trunk? Thanks, Bill gcc: 2014-01-12 Bill Schmidt <wschm...@linux.vnet.ibm.com> * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): Implement -maltivec=be for vec_insert and vec_extract. gcc/testsuite: 2014-01-12 Bill Schmidt <wschm...@linux.vnet.ibm.com> * gcc.dg/vmx/insert.c: New. * gcc.dg/vmx/insert-be-order.c: New. * gcc.dg/vmx/extract.c: New. * gcc.dg/vmx/extract-be-order.c: New. Index: gcc/testsuite/gcc.dg/vmx/insert-be-order.c =================================================================== --- gcc/testsuite/gcc.dg/vmx/insert-be-order.c (revision 0) +++ gcc/testsuite/gcc.dg/vmx/insert-be-order.c (revision 0) @@ -0,0 +1,65 @@ +/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */ + +#include "harness.h" + +static void test() +{ + vector unsigned char va = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + vector signed char vb = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7}; + vector unsigned short vc = {0,1,2,3,4,5,6,7}; + vector signed short vd = {-4,-3,-2,-1,0,1,2,3}; + vector unsigned int ve = {0,1,2,3}; + vector signed int vf = {-2,-1,0,1}; + vector float vg = {-2.0f,-1.0f,0.0f,1.0f}; + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + check (vec_all_eq (vec_insert (16, va, 5), + ((vector unsigned char) + {0,1,2,3,4,5,6,7,8,9,16,11,12,13,14,15})), + "vec_insert (va LE)"); + check (vec_all_eq (vec_insert (-16, vb, 0), + ((vector signed char) + {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,-16})), + "vec_insert (vb LE)"); + check (vec_all_eq (vec_insert (16, vc, 7), + ((vector unsigned short){16,1,2,3,4,5,6,7})), + "vec_insert (vc LE)"); + check (vec_all_eq (vec_insert (-16, vd, 3), + ((vector signed short){-4,-3,-2,-1,-16,1,2,3})), + "vec_insert (vd LE)"); + check (vec_all_eq (vec_insert (16, ve, 2), + ((vector unsigned int){0,16,2,3})), + "vec_insert (ve LE)"); + check (vec_all_eq (vec_insert (-16, vf, 1), + ((vector signed int){-2,-1,-16,1})), + "vec_insert (vf LE)"); + check (vec_all_eq (vec_insert (-16.0f, vg, 0), + ((vector float){-2.0f,-1.0f,0.0f,-16.0f})), + "vec_insert (vg LE)"); +#else + check (vec_all_eq (vec_insert (16, va, 5), + ((vector unsigned char) + {0,1,2,3,4,16,6,7,8,9,10,11,12,13,14,15})), + "vec_insert (va BE)"); + check (vec_all_eq (vec_insert (-16, vb, 0), + ((vector signed char) + {-16,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7})), + "vec_insert (vb BE)"); + check (vec_all_eq (vec_insert (16, vc, 7), + ((vector unsigned short){0,1,2,3,4,5,6,16})), + "vec_insert (vc BE)"); + check (vec_all_eq (vec_insert (-16, vd, 3), + ((vector signed short){-4,-3,-2,-16,0,1,2,3})), + "vec_insert (vd BE)"); + check (vec_all_eq (vec_insert (16, ve, 2), + ((vector unsigned int){0,1,16,3})), + "vec_insert (ve BE)"); + check (vec_all_eq (vec_insert (-16, vf, 1), + ((vector signed int){-2,-16,0,1})), + "vec_insert (vf BE)"); + check (vec_all_eq (vec_insert (-16.0f, vg, 0), + ((vector float){-16.0f,-1.0f,0.0f,1.0f})), + "vec_insert (vg BE)"); +#endif +} + Index: gcc/testsuite/gcc.dg/vmx/extract-be-order.c =================================================================== --- gcc/testsuite/gcc.dg/vmx/extract-be-order.c (revision 0) +++ gcc/testsuite/gcc.dg/vmx/extract-be-order.c (revision 0) @@ -0,0 +1,33 @@ +/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */ + +#include "harness.h" + +static void test() +{ + vector unsigned char va = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + vector signed char vb = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7}; + vector unsigned short vc = {0,1,2,3,4,5,6,7}; + vector signed short vd = {-4,-3,-2,-1,0,1,2,3}; + vector unsigned int ve = {0,1,2,3}; + vector signed int vf = {-2,-1,0,1}; + vector float vg = {-2.0f,-1.0f,0.0f,1.0f}; + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + check (vec_extract (va, 5) == 10, "vec_extract (va, 5)"); + check (vec_extract (vb, 0) == 7, "vec_extract (vb, 0)"); + check (vec_extract (vc, 7) == 0, "vec_extract (vc, 7)"); + check (vec_extract (vd, 3) == 0, "vec_extract (vd, 3)"); + check (vec_extract (ve, 2) == 1, "vec_extract (ve, 2)"); + check (vec_extract (vf, 1) == 0, "vec_extract (vf, 1)"); + check (vec_extract (vg, 0) == 1.0f, "vec_extract (vg, 0)"); +#else + check (vec_extract (va, 5) == 5, "vec_extract (va, 5)"); + check (vec_extract (vb, 0) == -8, "vec_extract (vb, 0)"); + check (vec_extract (vc, 7) == 7, "vec_extract (vc, 7)"); + check (vec_extract (vd, 3) == -1, "vec_extract (vd, 3)"); + check (vec_extract (ve, 2) == 2, "vec_extract (ve, 2)"); + check (vec_extract (vf, 1) == -1, "vec_extract (vf, 1)"); + check (vec_extract (vg, 0) == -2.0f, "vec_extract (vg, 0)"); +#endif +} + Index: gcc/testsuite/gcc.dg/vmx/extract.c =================================================================== --- gcc/testsuite/gcc.dg/vmx/extract.c (revision 0) +++ gcc/testsuite/gcc.dg/vmx/extract.c (revision 0) @@ -0,0 +1,21 @@ +#include "harness.h" + +static void test() +{ + vector unsigned char va = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + vector signed char vb = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7}; + vector unsigned short vc = {0,1,2,3,4,5,6,7}; + vector signed short vd = {-4,-3,-2,-1,0,1,2,3}; + vector unsigned int ve = {0,1,2,3}; + vector signed int vf = {-2,-1,0,1}; + vector float vg = {-2.0f,-1.0f,0.0f,1.0f}; + + check (vec_extract (va, 5) == 5, "vec_extract (va, 5)"); + check (vec_extract (vb, 0) == -8, "vec_extract (vb, 0)"); + check (vec_extract (vc, 7) == 7, "vec_extract (vc, 7)"); + check (vec_extract (vd, 3) == -1, "vec_extract (vd, 3)"); + check (vec_extract (ve, 2) == 2, "vec_extract (ve, 2)"); + check (vec_extract (vf, 1) == -1, "vec_extract (vf, 1)"); + check (vec_extract (vg, 0) == -2.0f, "vec_extract (vg, 0)"); +} + Index: gcc/testsuite/gcc.dg/vmx/insert.c =================================================================== --- gcc/testsuite/gcc.dg/vmx/insert.c (revision 0) +++ gcc/testsuite/gcc.dg/vmx/insert.c (revision 0) @@ -0,0 +1,37 @@ +#include "harness.h" + +static void test() +{ + vector unsigned char va = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + vector signed char vb = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7}; + vector unsigned short vc = {0,1,2,3,4,5,6,7}; + vector signed short vd = {-4,-3,-2,-1,0,1,2,3}; + vector unsigned int ve = {0,1,2,3}; + vector signed int vf = {-2,-1,0,1}; + vector float vg = {-2.0f,-1.0f,0.0f,1.0f}; + + check (vec_all_eq (vec_insert (16, va, 5), + ((vector unsigned char) + {0,1,2,3,4,16,6,7,8,9,10,11,12,13,14,15})), + "vec_insert (va)"); + check (vec_all_eq (vec_insert (-16, vb, 0), + ((vector signed char) + {-16,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7})), + "vec_insert (vb)"); + check (vec_all_eq (vec_insert (16, vc, 7), + ((vector unsigned short){0,1,2,3,4,5,6,16})), + "vec_insert (vc)"); + check (vec_all_eq (vec_insert (-16, vd, 3), + ((vector signed short){-4,-3,-2,-16,0,1,2,3})), + "vec_insert (vd)"); + check (vec_all_eq (vec_insert (16, ve, 2), + ((vector unsigned int){0,1,16,3})), + "vec_insert (ve)"); + check (vec_all_eq (vec_insert (-16, vf, 1), + ((vector signed int){-2,-16,0,1})), + "vec_insert (vf)"); + check (vec_all_eq (vec_insert (-16.0f, vg, 0), + ((vector float){-16.0f,-1.0f,0.0f,1.0f})), + "vec_insert (vg)"); +} + Index: gcc/config/rs6000/rs6000-c.c =================================================================== --- gcc/config/rs6000/rs6000-c.c (revision 206571) +++ gcc/config/rs6000/rs6000-c.c (working copy) @@ -4172,7 +4172,7 @@ altivec_resolve_overloaded_builtin (location_t loc return build_constructor (type, vec); } - /* For now use pointer tricks to do the extaction, unless we are on VSX + /* For now use pointer tricks to do the extraction, unless we are on VSX extracting a double from a constant offset. */ if (fcode == ALTIVEC_BUILTIN_VEC_EXTRACT) { @@ -4200,6 +4200,19 @@ altivec_resolve_overloaded_builtin (location_t loc if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2))) goto bad; + /* If we are targeting little-endian, but -maltivec=be has been + specified to override the element order, adjust the element + number accordingly. */ + if (!BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 2) + { + int last_elem = TYPE_VECTOR_SUBPARTS (arg1_type) - 1; + double_int di_last_elem = double_int::from_uhwi (last_elem); + arg2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (arg2), + double_int_to_tree (TREE_TYPE (arg2), + di_last_elem), + arg2); + } + /* If we can use the VSX xxpermdi instruction, use that for extract. */ mode = TYPE_MODE (arg1_type); if ((mode == V2DFmode || mode == V2DImode) && VECTOR_MEM_VSX_P (mode) @@ -4256,7 +4269,7 @@ altivec_resolve_overloaded_builtin (location_t loc return stmt; } - /* For now use pointer tricks to do the insertation, unless we are on VSX + /* For now use pointer tricks to do the insertion, unless we are on VSX inserting a double to a constant offset.. */ if (fcode == ALTIVEC_BUILTIN_VEC_INSERT) { @@ -4286,6 +4299,19 @@ altivec_resolve_overloaded_builtin (location_t loc if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2))) goto bad; + /* If we are targeting little-endian, but -maltivec=be has been + specified to override the element order, adjust the element + number accordingly. */ + if (!BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 2) + { + int last_elem = TYPE_VECTOR_SUBPARTS (arg1_type) - 1; + double_int di_last_elem = double_int::from_uhwi (last_elem); + arg2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (arg2), + double_int_to_tree (TREE_TYPE (arg2), + di_last_elem), + arg2); + } + /* If we can use the VSX xxpermdi instruction, use that for insert. */ mode = TYPE_MODE (arg1_type); if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode)