Hi,

This patch adds logic for -maltivec=be with a little endian target when
generating code for the vec_sums builtin.  This implements the vsumsws
instruction, which adds the four elements in the first input vector
operand to element 3 of the second input vector operand, placing the
result in element 3 of the destination vector operand.

For little endian, element 3 is the leftmost (most significant) word in
the vector register, while the instruction treats element 3 as the
rightmost (least significant) word.  Since there is not a vector
shift-immediate or rotate-immediate instruction in VMX, we use a splat
instruction to get LE element 3 (BE element 0) into BE element 3 of a
scratch register for input to the vsumsws instruction.  Similarly, the
result of the vsumsws instruction is then splatted from BE element 3
into BE element 0 (LE element 3) where it is expected to be by any
builtin that consumes that value.  The destination register is reused
for this purpose.

As with other patches in this series, an altivec_vsumsws_direct pattern
is added for uses of vsumsws internal to GCC.

Two new test cases are added that demonstrate how the vec_vsums builtin
is expected to behave for BE, LE, and LE with -maltivec=be.

Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no
regressions.  Is this ok for trunk?

Thanks,
Bill


gcc:

2014-01-30  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * config/rs6000/altivec.md (UNSPEC_VSUMSWS_DIRECT): New unspec.
        (altivec_vsumsws): Add handling for -maltivec=be with a little
        endian target.
        (altivec_vsumsws_direct): New.
        (reduc_splus_<mode>): Call gen_altivec_vsumsws_direct instead of
        gen_altivec_vsumsws.

gcc/testsuite:

2014-01-30  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * gcc.dg/vmx/vsums.c: New.
        * gcc.dg/vmx/vsums-be-order.c: New.


Index: gcc/testsuite/gcc.dg/vmx/vsums.c
===================================================================
--- gcc/testsuite/gcc.dg/vmx/vsums.c    (revision 0)
+++ gcc/testsuite/gcc.dg/vmx/vsums.c    (revision 0)
@@ -0,0 +1,12 @@
+#include "harness.h"
+
+static void test()
+{
+  vector signed int va = {-7,11,-13,17};
+  vector signed int vb = {0,0,0,128};
+
+  vector signed int vd = vec_sums (va, vb);
+  signed int r = vec_extract (vd, 3);
+
+  check (r == 136, "sums");
+}
Index: gcc/testsuite/gcc.dg/vmx/vsums-be-order.c
===================================================================
--- gcc/testsuite/gcc.dg/vmx/vsums-be-order.c   (revision 0)
+++ gcc/testsuite/gcc.dg/vmx/vsums-be-order.c   (revision 0)
@@ -0,0 +1,19 @@
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
+
+#include "harness.h"
+
+static void test()
+{
+  vector signed int va = {-7,11,-13,17};
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  vector signed int vb = {128,0,0,0};
+#else
+  vector signed int vb = {0,0,0,128};
+#endif
+
+  vector signed int vd = vec_sums (va, vb);
+  signed int r = vec_extract (vd, 3);
+
+  check (r == 136, "sums");
+}
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md        (revision 207326)
+++ gcc/config/rs6000/altivec.md        (working copy)
@@ -132,6 +132,7 @@
    UNSPEC_VMRGH_DIRECT
    UNSPEC_VMRGL_DIRECT
    UNSPEC_VSPLT_DIRECT
+   UNSPEC_VSUMSWS_DIRECT
 ])
 
 (define_c_enum "unspecv"
@@ -1601,6 +1602,27 @@
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
                       (match_operand:V4SI 2 "register_operand" "v")]
                     UNSPEC_VSUMSWS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))
+   (clobber (match_scratch:V4SI 3 "=v"))]
+  "TARGET_ALTIVEC"
+{
+  if (BYTES_BIG_ENDIAN || VECTOR_ELT_ORDER_BIG)
+    return "vsumsws %0,%1,%2";
+  else
+    return "vspltw %3,%2,0\n\tvsumsws %3,%1,%3\n\tvspltw %0,%3,3";
+}
+  [(set_attr "type" "veccomplex")
+   (set (attr "length")
+     (if_then_else
+       (match_test "(BYTES_BIG_ENDIAN || VECTOR_ELT_ORDER_BIG)")
+       (const_string "4")
+       (const_string "12")))])
+
+(define_insn "altivec_vsumsws_direct"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+                    UNSPEC_VSUMSWS_DIRECT))
    (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
   "TARGET_ALTIVEC"
   "vsumsws %0,%1,%2"
@@ -2337,7 +2359,7 @@
 
   emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
   emit_insn (gen_altivec_vsum4s<VI_char>s (vtmp1, operands[1], vzero));
-  emit_insn (gen_altivec_vsumsws (dest, vtmp1, vzero));
+  emit_insn (gen_altivec_vsumsws_direct (dest, vtmp1, vzero));
   DONE;
 })
 


Reply via email to