Hi, This patch adds logic for -maltivec=be with a little endian target when generating code for the vec_sums builtin. This implements the vsumsws instruction, which adds the four elements in the first input vector operand to element 3 of the second input vector operand, placing the result in element 3 of the destination vector operand.
For little endian, element 3 is the leftmost (most significant) word in the vector register, while the instruction treats element 3 as the rightmost (least significant) word. Since there is not a vector shift-immediate or rotate-immediate instruction in VMX, we use a splat instruction to get LE element 3 (BE element 0) into BE element 3 of a scratch register for input to the vsumsws instruction. Similarly, the result of the vsumsws instruction is then splatted from BE element 3 into BE element 0 (LE element 3) where it is expected to be by any builtin that consumes that value. The destination register is reused for this purpose. As with other patches in this series, an altivec_vsumsws_direct pattern is added for uses of vsumsws internal to GCC. Two new test cases are added that demonstrate how the vec_vsums builtin is expected to behave for BE, LE, and LE with -maltivec=be. Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no regressions. Is this ok for trunk? Thanks, Bill gcc: 2014-01-30 Bill Schmidt <wschm...@linux.vnet.ibm.com> * config/rs6000/altivec.md (UNSPEC_VSUMSWS_DIRECT): New unspec. (altivec_vsumsws): Add handling for -maltivec=be with a little endian target. (altivec_vsumsws_direct): New. (reduc_splus_<mode>): Call gen_altivec_vsumsws_direct instead of gen_altivec_vsumsws. gcc/testsuite: 2014-01-30 Bill Schmidt <wschm...@linux.vnet.ibm.com> * gcc.dg/vmx/vsums.c: New. * gcc.dg/vmx/vsums-be-order.c: New. Index: gcc/testsuite/gcc.dg/vmx/vsums.c =================================================================== --- gcc/testsuite/gcc.dg/vmx/vsums.c (revision 0) +++ gcc/testsuite/gcc.dg/vmx/vsums.c (revision 0) @@ -0,0 +1,12 @@ +#include "harness.h" + +static void test() +{ + vector signed int va = {-7,11,-13,17}; + vector signed int vb = {0,0,0,128}; + + vector signed int vd = vec_sums (va, vb); + signed int r = vec_extract (vd, 3); + + check (r == 136, "sums"); +} Index: gcc/testsuite/gcc.dg/vmx/vsums-be-order.c =================================================================== --- gcc/testsuite/gcc.dg/vmx/vsums-be-order.c (revision 0) +++ gcc/testsuite/gcc.dg/vmx/vsums-be-order.c (revision 0) @@ -0,0 +1,19 @@ +/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */ + +#include "harness.h" + +static void test() +{ + vector signed int va = {-7,11,-13,17}; + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + vector signed int vb = {128,0,0,0}; +#else + vector signed int vb = {0,0,0,128}; +#endif + + vector signed int vd = vec_sums (va, vb); + signed int r = vec_extract (vd, 3); + + check (r == 136, "sums"); +} Index: gcc/config/rs6000/altivec.md =================================================================== --- gcc/config/rs6000/altivec.md (revision 207326) +++ gcc/config/rs6000/altivec.md (working copy) @@ -132,6 +132,7 @@ UNSPEC_VMRGH_DIRECT UNSPEC_VMRGL_DIRECT UNSPEC_VSPLT_DIRECT + UNSPEC_VSUMSWS_DIRECT ]) (define_c_enum "unspecv" @@ -1601,6 +1602,27 @@ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] UNSPEC_VSUMSWS)) + (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR)) + (clobber (match_scratch:V4SI 3 "=v"))] + "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN || VECTOR_ELT_ORDER_BIG) + return "vsumsws %0,%1,%2"; + else + return "vspltw %3,%2,0\n\tvsumsws %3,%1,%3\n\tvspltw %0,%3,3"; +} + [(set_attr "type" "veccomplex") + (set (attr "length") + (if_then_else + (match_test "(BYTES_BIG_ENDIAN || VECTOR_ELT_ORDER_BIG)") + (const_string "4") + (const_string "12")))]) + +(define_insn "altivec_vsumsws_direct" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VSUMSWS_DIRECT)) (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] "TARGET_ALTIVEC" "vsumsws %0,%1,%2" @@ -2337,7 +2359,7 @@ emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); emit_insn (gen_altivec_vsum4s<VI_char>s (vtmp1, operands[1], vzero)); - emit_insn (gen_altivec_vsumsws (dest, vtmp1, vzero)); + emit_insn (gen_altivec_vsumsws_direct (dest, vtmp1, vzero)); DONE; })