[PATCH, rs6000] Implement -maltivec=be for vec_mergeh and vec_mergel Altivec builtins

Bill Schmidt Thu, 23 Jan 2014 16:10:09 -0800

Hi,

This patch continues the series of changes to the Altivec builtins to
accommodate big-endian element order when targeting a little endian
machine.  Here the focus is on the vector merge-high and merge-low
operations.


The primary change is in altivec.md.  As an example, look at the pattern
altivec_vmrghw.  Previously, this was represented with a single
define_insn.  Now it's been split into a define_expand to create the
RTL, and a define_insn (*altivec_vmrghw_endian) to generate the hardware
instruction.  This is because we need a different selection vector when
using -maltivec=be and targeting LE.  (Normally LE and BE can use the
same selection vector, and GCC takes care of interpreting the indices as
left-to-right or right-to-left.)  The new define_insn also substitutes
vmrglw with swapped operands for vmrghw for little-endian mode, since
the hardware instruction has "big-endian bias" in the interpretation of
"high" and "low."

Because -maltivec=be applies only to programmer-specified builtins, we
need to adjust internal uses of altivec_vmrghw and friends.  Thus we
have a new define_insn altivec_vmrghw_internal that generates the
hardware instruction directly with none of the above transformations.
New unspecs are needed for these internal forms.

The VSX flavors of merge-high and merge-low are a little simpler (see
vsx.md).  Here we already had a define_expand where the instructions are
generated by separate xxpermdi patterns, and there are no internal uses
to worry about.  So we only need to change the selection vector in the
generated RTL.

There are four new test cases that cover all of the supported data
types.  Tests are divided between those that require only VMX
instructions and those that require VSX instructions.  There are also
variants for -maltivec and -maltivec=be.

Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no
regressions.  Ok for trunk?

Thanks,
Bill


gcc:

2014-01-23  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * config/rs6000/rs6000.c (altivec_expand_vec_perm_const):  Use
        CODE_FOR_altivec_vmrg*_internal rather than
        CODE_FOR_altivec_vmrg*.
        * config/rs6000/vsx.md (vsx_mergel_<mode>): Adjust for
        -maltivec=be with LE targets.
        (vsx_mergeh_<mode>): Likewise.
        * config/rs6000/altivec.md (UNSPEC_VMRG[HL][BHW]_INTERNAL): New
        unspecs.
        (mulv8hi3): Use gen_altivec_vmrg[hl]w_internal.
        (altivec_vmrghb): Replace with define_expand and new
        *altivec_vmrghb_endian insn; adjust for -maltivec=be with LE
        targets.
        (altivec_vmrghb_internal): New define_insn.
        (altivec_vmrghh): Replace with define_expand and new
        *altivec_vmrghh_endian insn; adjust for -maltivec=be with LE
        targets.
        (altivec_vmrghh_internal): New define_insn.
        (altivec_vmrghw): Replace with define_expand and new
        *altivec_vmrghw_endian insn; adjust for -maltivec=be with LE
        targets.
        (altivec_vmrghw_internal): New define_insn.
        (*altivec_vmrghsf): Adjust for endianness.
        (altivec_vmrglb): Replace with define_expand and new
        *altivec_vmrglb_endian insn; adjust for -maltivec=be with LE
        targets.
        (altivec_vmrglb_internal): New define_insn.
        (altivec_vmrglh): Replace with define_expand and new
        *altivec_vmrglh_endian insn; adjust for -maltivec=be with LE
        targets.
        (altivec_vmrglh_internal): New define_insn.
        (altivec_vmrglw): Replace with define_expand and new
        *altivec_vmrglw_endian insn; adjust for -maltivec=be with LE
        targets.
        (altivec_vmrglw_internal): New define_insn.
        (*altivec_vmrglsf): Adjust for endianness.
        (vec_widen_umult_hi_v16qi): Use gen_altivec_vmrghh_internal.
        (vec_widen_umult_lo_v16qi): Use gen_altivec_vmrglh_internal.
        (vec_widen_smult_hi_v16qi): Use gen_altivec_vmrghh_internal.
        (vec_widen_smult_lo_v16qi): Use gen_altivec_vmrglh_internal.
        (vec_widen_umult_hi_v8hi): Use gen_altivec_vmrghw_internal.
        (vec_widen_umult_lo_v8hi): Use gen_altivec_vmrglw_internal.
        (vec_widen_smult_hi_v8hi): Use gen_altivec_vmrghw_internal.
        (vec_widen_smult_lo_v8hi): Use gen_altivec_vmrglw_internal.

gcc/testsuite:

2014-01-23  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * gcc.dg/vmx/merge-be-order.c: New.
        * gcc.dg/vmx/merge.c: New.
        * gcc.dg/vmx/merge-vsx-be-order.c: New.
        * gcc.dg/vmx/merge-vsx.c: New.


Index: gcc/testsuite/gcc.dg/vmx/merge-be-order.c
===================================================================
--- gcc/testsuite/gcc.dg/vmx/merge-be-order.c   (revision 0)
+++ gcc/testsuite/gcc.dg/vmx/merge-be-order.c   (revision 0)
@@ -0,0 +1,96 @@
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
+
+#include "harness.h"
+
+static void test()
+{
+  /* Input vectors.  */
+  vector unsigned char vuca = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  vector unsigned char vucb
+    = {16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
+  vector signed char vsca
+    = {-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1};
+  vector signed char vscb = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  vector unsigned short vusa = {0,1,2,3,4,5,6,7};
+  vector unsigned short vusb = {8,9,10,11,12,13,14,15};
+  vector signed short vssa = {-8,-7,-6,-5,-4,-3,-2,-1};
+  vector signed short vssb = {0,1,2,3,4,5,6,7};
+  vector unsigned int vuia = {0,1,2,3};
+  vector unsigned int vuib = {4,5,6,7};
+  vector signed int vsia = {-4,-3,-2,-1};
+  vector signed int vsib = {0,1,2,3};
+  vector float vfa = {-4.0,-3.0,-2.0,-1.0};
+  vector float vfb = {0.0,1.0,2.0,3.0};
+
+  /* Result vectors.  */
+  vector unsigned char vuch, vucl;
+  vector signed char vsch, vscl;
+  vector unsigned short vush, vusl;
+  vector signed short vssh, vssl;
+  vector unsigned int vuih, vuil;
+  vector signed int vsih, vsil;
+  vector float vfh, vfl;
+
+  /* Expected result vectors.  */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  vector unsigned char vucrh = {24,8,25,9,26,10,27,11,28,12,29,13,30,14,31,15};
+  vector unsigned char vucrl = {16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7};
+  vector signed char vscrh = {8,-8,9,-7,10,-6,11,-5,12,-4,13,-3,14,-2,15,-1};
+  vector signed char vscrl = {0,-16,1,-15,2,-14,3,-13,4,-12,5,-11,6,-10,7,-9};
+  vector unsigned short vusrh = {12,4,13,5,14,6,15,7};
+  vector unsigned short vusrl = {8,0,9,1,10,2,11,3};
+  vector signed short vssrh = {4,-4,5,-3,6,-2,7,-1};
+  vector signed short vssrl = {0,-8,1,-7,2,-6,3,-5};
+  vector unsigned int vuirh = {6,2,7,3};
+  vector unsigned int vuirl = {4,0,5,1};
+  vector signed int vsirh = {2,-2,3,-1};
+  vector signed int vsirl = {0,-4,1,-3};
+  vector float vfrh = {2.0,-2.0,3.0,-1.0};
+  vector float vfrl = {0.0,-4.0,1.0,-3.0};
+#else
+  vector unsigned char vucrh = {0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23};
+  vector unsigned char vucrl = {8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31};
+  vector signed char vscrh = {-16,0,-15,1,-14,2,-13,3,-12,4,-11,5,-10,6,-9,7};
+  vector signed char vscrl = {-8,8,-7,9,-6,10,-5,11,-4,12,-3,13,-2,14,-1,15};
+  vector unsigned short vusrh = {0,8,1,9,2,10,3,11};
+  vector unsigned short vusrl = {4,12,5,13,6,14,7,15};
+  vector signed short vssrh = {-8,0,-7,1,-6,2,-5,3};
+  vector signed short vssrl = {-4,4,-3,5,-2,6,-1,7};
+  vector unsigned int vuirh = {0,4,1,5};
+  vector unsigned int vuirl = {2,6,3,7};
+  vector signed int vsirh = {-4,0,-3,1};
+  vector signed int vsirl = {-2,2,-1,3};
+  vector float vfrh = {-4.0,0.0,-3.0,1.0};
+  vector float vfrl = {-2.0,2.0,-1.0,3.0};
+#endif
+
+  vuch = vec_mergeh (vuca, vucb);
+  vucl = vec_mergel (vuca, vucb);
+  vsch = vec_mergeh (vsca, vscb);
+  vscl = vec_mergel (vsca, vscb);
+  vush = vec_mergeh (vusa, vusb);
+  vusl = vec_mergel (vusa, vusb);
+  vssh = vec_mergeh (vssa, vssb);
+  vssl = vec_mergel (vssa, vssb);
+  vuih = vec_mergeh (vuia, vuib);
+  vuil = vec_mergel (vuia, vuib);
+  vsih = vec_mergeh (vsia, vsib);
+  vsil = vec_mergel (vsia, vsib);
+  vfh  = vec_mergeh (vfa,  vfb );
+  vfl  = vec_mergel (vfa,  vfb );
+
+  check (vec_all_eq (vuch, vucrh), "vuch");
+  check (vec_all_eq (vucl, vucrl), "vucl");
+  check (vec_all_eq (vsch, vscrh), "vsch");
+  check (vec_all_eq (vscl, vscrl), "vscl");
+  check (vec_all_eq (vush, vusrh), "vush");
+  check (vec_all_eq (vusl, vusrl), "vusl");
+  check (vec_all_eq (vssh, vssrh), "vssh");
+  check (vec_all_eq (vssl, vssrl), "vssl");
+  check (vec_all_eq (vuih, vuirh), "vuih");
+  check (vec_all_eq (vuil, vuirl), "vuil");
+  check (vec_all_eq (vsih, vsirh), "vsih");
+  check (vec_all_eq (vsil, vsirl), "vsil");
+  check (vec_all_eq (vfh,  vfrh),  "vfh");
+  check (vec_all_eq (vfl,  vfrl),  "vfl");
+}
Index: gcc/testsuite/gcc.dg/vmx/merge.c
===================================================================
--- gcc/testsuite/gcc.dg/vmx/merge.c    (revision 0)
+++ gcc/testsuite/gcc.dg/vmx/merge.c    (revision 0)
@@ -0,0 +1,77 @@
+#include "harness.h"
+
+static void test()
+{
+  /* Input vectors.  */
+  vector unsigned char vuca = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  vector unsigned char vucb
+    = {16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
+  vector signed char vsca
+    = {-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1};
+  vector signed char vscb = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  vector unsigned short vusa = {0,1,2,3,4,5,6,7};
+  vector unsigned short vusb = {8,9,10,11,12,13,14,15};
+  vector signed short vssa = {-8,-7,-6,-5,-4,-3,-2,-1};
+  vector signed short vssb = {0,1,2,3,4,5,6,7};
+  vector unsigned int vuia = {0,1,2,3};
+  vector unsigned int vuib = {4,5,6,7};
+  vector signed int vsia = {-4,-3,-2,-1};
+  vector signed int vsib = {0,1,2,3};
+  vector float vfa = {-4.0,-3.0,-2.0,-1.0};
+  vector float vfb = {0.0,1.0,2.0,3.0};
+
+  /* Result vectors.  */
+  vector unsigned char vuch, vucl;
+  vector signed char vsch, vscl;
+  vector unsigned short vush, vusl;
+  vector signed short vssh, vssl;
+  vector unsigned int vuih, vuil;
+  vector signed int vsih, vsil;
+  vector float vfh, vfl;
+
+  /* Expected result vectors.  */
+  vector unsigned char vucrh = {0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23};
+  vector unsigned char vucrl = {8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31};
+  vector signed char vscrh = {-16,0,-15,1,-14,2,-13,3,-12,4,-11,5,-10,6,-9,7};
+  vector signed char vscrl = {-8,8,-7,9,-6,10,-5,11,-4,12,-3,13,-2,14,-1,15};
+  vector unsigned short vusrh = {0,8,1,9,2,10,3,11};
+  vector unsigned short vusrl = {4,12,5,13,6,14,7,15};
+  vector signed short vssrh = {-8,0,-7,1,-6,2,-5,3};
+  vector signed short vssrl = {-4,4,-3,5,-2,6,-1,7};
+  vector unsigned int vuirh = {0,4,1,5};
+  vector unsigned int vuirl = {2,6,3,7};
+  vector signed int vsirh = {-4,0,-3,1};
+  vector signed int vsirl = {-2,2,-1,3};
+  vector float vfrh = {-4.0,0.0,-3.0,1.0};
+  vector float vfrl = {-2.0,2.0,-1.0,3.0};
+
+  vuch = vec_mergeh (vuca, vucb);
+  vucl = vec_mergel (vuca, vucb);
+  vsch = vec_mergeh (vsca, vscb);
+  vscl = vec_mergel (vsca, vscb);
+  vush = vec_mergeh (vusa, vusb);
+  vusl = vec_mergel (vusa, vusb);
+  vssh = vec_mergeh (vssa, vssb);
+  vssl = vec_mergel (vssa, vssb);
+  vuih = vec_mergeh (vuia, vuib);
+  vuil = vec_mergel (vuia, vuib);
+  vsih = vec_mergeh (vsia, vsib);
+  vsil = vec_mergel (vsia, vsib);
+  vfh  = vec_mergeh (vfa,  vfb );
+  vfl  = vec_mergel (vfa,  vfb );
+
+  check (vec_all_eq (vuch, vucrh), "vuch");
+  check (vec_all_eq (vucl, vucrl), "vucl");
+  check (vec_all_eq (vsch, vscrh), "vsch");
+  check (vec_all_eq (vscl, vscrl), "vscl");
+  check (vec_all_eq (vush, vusrh), "vush");
+  check (vec_all_eq (vusl, vusrl), "vusl");
+  check (vec_all_eq (vssh, vssrh), "vssh");
+  check (vec_all_eq (vssl, vssrl), "vssl");
+  check (vec_all_eq (vuih, vuirh), "vuih");
+  check (vec_all_eq (vuil, vuirl), "vuil");
+  check (vec_all_eq (vsih, vsirh), "vsih");
+  check (vec_all_eq (vsil, vsirl), "vsil");
+  check (vec_all_eq (vfh,  vfrh),  "vfh");
+  check (vec_all_eq (vfl,  vfrl),  "vfl");
+}
Index: gcc/testsuite/gcc.dg/vmx/merge-vsx-be-order.c
===================================================================
--- gcc/testsuite/gcc.dg/vmx/merge-vsx-be-order.c       (revision 0)
+++ gcc/testsuite/gcc.dg/vmx/merge-vsx-be-order.c       (revision 0)
@@ -0,0 +1,46 @@
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mvsx" } */
+
+#include "harness.h"
+
+static int vec_long_eq (vector long x, vector long y)
+{
+  return (x[0] == y[0] && x[1] == y[1]);
+}
+
+static void test()
+{
+  /* Input vectors.  */
+  vector long vla = {-2,-1};
+  vector long vlb = {0,1};
+  vector double vda = {-2.0,-1.0};
+  vector double vdb = {0.0,1.0};
+
+  /* Result vectors.  */
+  vector long vlh, vll;
+  vector double vdh, vdl;
+
+  /* Expected result vectors.  */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  vector long vlrh = {1,-1};
+  vector long vlrl = {0,-2};
+  vector double vdrh = {1.0,-1.0};
+  vector double vdrl = {0.0,-2.0};
+#else
+  vector long vlrh = {-2,0};
+  vector long vlrl = {-1,1};
+  vector double vdrh = {-2.0,0.0};
+  vector double vdrl = {-1.0,1.0};
+#endif
+
+  vlh = vec_mergeh (vla, vlb);
+  vll = vec_mergel (vla, vlb);
+  vdh = vec_mergeh (vda, vdb);
+  vdl = vec_mergel (vda, vdb);
+
+  check (vec_long_eq (vlh, vlrh), "vlh");
+  check (vec_long_eq (vll, vlrl), "vll");
+  check (vec_all_eq (vdh, vdrh), "vdh" );
+  check (vec_all_eq (vdl, vdrl), "vdl" );
+}
Index: gcc/testsuite/gcc.dg/vmx/merge-vsx.c
===================================================================
--- gcc/testsuite/gcc.dg/vmx/merge-vsx.c        (revision 0)
+++ gcc/testsuite/gcc.dg/vmx/merge-vsx.c        (revision 0)
@@ -0,0 +1,39 @@
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-maltivec -mabi=altivec -std=gnu99 -mvsx" } */
+
+#include "harness.h"
+
+static int vec_long_eq (vector long x, vector long y)
+{
+  return (x[0] == y[0] && x[1] == y[1]);
+}
+
+static void test()
+{
+  /* Input vectors.  */
+  vector long vla = {-2,-1};
+  vector long vlb = {0,1};
+  vector double vda = {-2.0,-1.0};
+  vector double vdb = {0.0,1.0};
+
+  /* Result vectors.  */
+  vector long vlh, vll;
+  vector double vdh, vdl;
+
+  /* Expected result vectors.  */
+  vector long vlrh = {-2,0};
+  vector long vlrl = {-1,1};
+  vector double vdrh = {-2.0,0.0};
+  vector double vdrl = {-1.0,1.0};
+
+  vlh = vec_mergeh (vla, vlb);
+  vll = vec_mergel (vla, vlb);
+  vdh = vec_mergeh (vda, vdb);
+  vdl = vec_mergel (vda, vdb);
+
+  check (vec_long_eq (vlh, vlrh), "vlh");
+  check (vec_long_eq (vll, vlrl), "vll");
+  check (vec_all_eq (vdh, vdrh), "vdh" );
+  check (vec_all_eq (vdl, vdrl), "vdl" );
+}
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  (revision 206889)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -29870,22 +29870,28 @@ altivec_expand_vec_perm_const (rtx operands[4])
     { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum,
       {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
     { OPTION_MASK_ALTIVEC, 
-      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb : CODE_FOR_altivec_vmrglb,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_internal
+       : CODE_FOR_altivec_vmrglb_internal),
       {  0, 16,  1, 17,  2, 18,  3, 19,  4, 20,  5, 21,  6, 22,  7, 23 } },
     { OPTION_MASK_ALTIVEC,
-      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh : CODE_FOR_altivec_vmrglh,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_internal
+       : CODE_FOR_altivec_vmrglh_internal),
       {  0,  1, 16, 17,  2,  3, 18, 19,  4,  5, 20, 21,  6,  7, 22, 23 } },
     { OPTION_MASK_ALTIVEC,
-      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw : CODE_FOR_altivec_vmrglw,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_internal
+       : CODE_FOR_altivec_vmrglw_internal),
       {  0,  1,  2,  3, 16, 17, 18, 19,  4,  5,  6,  7, 20, 21, 22, 23 } },
     { OPTION_MASK_ALTIVEC,
-      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb : CODE_FOR_altivec_vmrghb,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_internal
+       : CODE_FOR_altivec_vmrghb_internal),
       {  8, 24,  9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
     { OPTION_MASK_ALTIVEC,
-      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh : CODE_FOR_altivec_vmrghh,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_internal
+       : CODE_FOR_altivec_vmrghh_internal),
       {  8,  9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
     { OPTION_MASK_ALTIVEC,
-      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw : CODE_FOR_altivec_vmrghw,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_internal
+       : CODE_FOR_altivec_vmrghw_internal),
       {  8,  9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
     { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
       {  0,  1,  2,  3, 16, 17, 18, 19,  8,  9, 10, 11, 24, 25, 26, 27 } },
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md    (revision 206889)
+++ gcc/config/rs6000/vsx.md    (working copy)
@@ -1655,25 +1672,55 @@
 
 ;; Expanders for builtins
 (define_expand "vsx_mergel_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "")
-       (vec_select:VSX_D
-         (vec_concat:<VS_double>
-           (match_operand:VSX_D 1 "vsx_register_operand" "")
-           (match_operand:VSX_D 2 "vsx_register_operand" ""))
-         (parallel [(const_int 1) (const_int 3)])))]
+  [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
+   (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
+   (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "")
+{
+  rtvec v;
+  rtx x;
 
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
+      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
+      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+})
+
 (define_expand "vsx_mergeh_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "")
-       (vec_select:VSX_D
-         (vec_concat:<VS_double>
-           (match_operand:VSX_D 1 "vsx_register_operand" "")
-           (match_operand:VSX_D 2 "vsx_register_operand" ""))
-         (parallel [(const_int 0) (const_int 2)])))]
+  [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
+   (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
+   (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "")
+{
+  rtvec v;
+  rtx x;
 
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
+      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
+      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+})
+
 ;; V2DF/V2DI splat
 (define_insn "vsx_splat_<mode>"
   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?wa,?wa,?wa")
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md        (revision 206889)
+++ gcc/config/rs6000/altivec.md        (working copy)
@@ -129,6 +129,12 @@
    UNSPEC_VUPKHU_V4SF
    UNSPEC_VUPKLU_V4SF
    UNSPEC_VGBBD
+   UNSPEC_VMRGHB_INTERNAL
+   UNSPEC_VMRGHH_INTERNAL
+   UNSPEC_VMRGHW_INTERNAL
+   UNSPEC_VMRGLB_INTERNAL
+   UNSPEC_VMRGLH_INTERNAL
+   UNSPEC_VMRGLW_INTERNAL
 ])
 
 (define_c_enum "unspecv"
@@ -677,8 +683,8 @@
      {
        emit_insn (gen_altivec_vmulesh (even, operands[1], operands[2]));
        emit_insn (gen_altivec_vmulosh (odd, operands[1], operands[2]));
-       emit_insn (gen_altivec_vmrghw (high, even, odd));
-       emit_insn (gen_altivec_vmrglw (low, even, odd));
+       emit_insn (gen_altivec_vmrghw_internal (high, even, odd));
+       emit_insn (gen_altivec_vmrglw_internal (low, even, odd));
        emit_insn (gen_altivec_vpkuwum (operands[0], high, low));
      }
    else
@@ -839,9 +845,40 @@
   "vmladduhm %0,%1,%2,%3"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "altivec_vmrghb"
+(define_expand "altivec_vmrghb"
+  [(use (match_operand:V16QI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:V16QI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25),
+                     GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27),
+                    GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29),
+                    GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31));
+      x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17),
+                     GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19),
+                    GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21),
+                    GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23));
+      x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+})
+
+(define_insn "*altivec_vmrghb_endian"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
-       (vec_select:V16QI
+        (vec_select:V16QI
          (vec_concat:V32QI
            (match_operand:V16QI 1 "register_operand" "v")
            (match_operand:V16QI 2 "register_operand" "v"))
@@ -854,12 +891,53 @@
                     (const_int 6) (const_int 22)
                     (const_int 7) (const_int 23)])))]
   "TARGET_ALTIVEC"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrghb %0,%1,%2";
+  else
+    return "vmrglb %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrghb_internal"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
+                       (match_operand:V16QI 2 "register_operand" "v")]
+                     UNSPEC_VMRGHB_INTERNAL))]
+  "TARGET_ALTIVEC"
   "vmrghb %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vmrghh"
+(define_expand "altivec_vmrghh"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13),
+                     GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15));
+      x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9),
+                     GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11));
+      x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+})
+
+(define_insn "*altivec_vmrghh_endian"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
-       (vec_select:V8HI
+        (vec_select:V8HI
          (vec_concat:V16HI
            (match_operand:V8HI 1 "register_operand" "v")
            (match_operand:V8HI 2 "register_operand" "v"))
@@ -868,10 +946,49 @@
                     (const_int 2) (const_int 10)
                     (const_int 3) (const_int 11)])))]
   "TARGET_ALTIVEC"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrghh %0,%1,%2";
+  else
+    return "vmrglh %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrghh_internal"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+                     UNSPEC_VMRGHH_INTERNAL))]
+  "TARGET_ALTIVEC"
   "vmrghh %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vmrghw"
+(define_expand "altivec_vmrghw"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7));
+      x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5));
+      x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+})
+
+(define_insn "*altivec_vmrghw_endian"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (vec_select:V4SI
          (vec_concat:V8SI
@@ -880,6 +997,20 @@
          (parallel [(const_int 0) (const_int 4)
                     (const_int 1) (const_int 5)])))]
   "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrghw %0,%1,%2";
+  else
+    return "vmrglw %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrghw_internal"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+                     UNSPEC_VMRGHW_INTERNAL))]
+  "TARGET_ALTIVEC"
   "vmrghw %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
@@ -892,10 +1023,46 @@
          (parallel [(const_int 0) (const_int 4)
                     (const_int 1) (const_int 5)])))]
   "VECTOR_MEM_ALTIVEC_P (V4SFmode)"
-  "vmrghw %0,%1,%2"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrghw %0,%1,%2";
+  else
+    return "vmrglw %0,%2,%1";
+}
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vmrglb"
+(define_expand "altivec_vmrglb"
+  [(use (match_operand:V16QI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:V16QI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17),
+                     GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19),
+                    GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21),
+                    GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23));
+      x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25),
+                     GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27),
+                    GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29),
+                    GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31));
+      x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+})
+
+(define_insn "*altivec_vmrglb_endian"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
         (vec_select:V16QI
          (vec_concat:V32QI
@@ -910,10 +1077,51 @@
                     (const_int 14) (const_int 30)
                     (const_int 15) (const_int 31)])))]
   "TARGET_ALTIVEC"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrglb %0,%1,%2";
+  else
+    return "vmrghb %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrglb_internal"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+                      UNSPEC_VMRGLB_INTERNAL))]
+  "TARGET_ALTIVEC"
   "vmrglb %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vmrglh"
+(define_expand "altivec_vmrglh"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9),
+                     GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11));
+      x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13),
+                     GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15));
+      x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+})
+
+(define_insn "*altivec_vmrglh_endian"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (vec_select:V8HI
          (vec_concat:V16HI
@@ -924,10 +1132,49 @@
                     (const_int 6) (const_int 14)
                     (const_int 7) (const_int 15)])))]
   "TARGET_ALTIVEC"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrglh %0,%1,%2";
+  else
+    return "vmrghh %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrglh_internal"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+                     (match_operand:V8HI 2 "register_operand" "v")]
+                     UNSPEC_VMRGLH_INTERNAL))]
+  "TARGET_ALTIVEC"
   "vmrglh %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vmrglw"
+(define_expand "altivec_vmrglw"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5));
+      x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7));
+      x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+})
+
+(define_insn "*altivec_vmrglw_endian"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (vec_select:V4SI
          (vec_concat:V8SI
@@ -936,6 +1183,37 @@
          (parallel [(const_int 2) (const_int 6)
                     (const_int 3) (const_int 7)])))]
   "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrglw %0,%1,%2";
+  else
+    return "vmrghw %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+;(define_insn "altivec_vmrglw"
+;  [(set (match_operand:V4SI 0 "register_operand" "=v")
+;        (vec_select:V4SI
+;        (vec_concat:V8SI
+;          (match_operand:V4SI 1 "register_operand" "v")
+;          (match_operand:V4SI 2 "register_operand" "v"))
+;        (parallel [(const_int 2) (const_int 6)
+;                   (const_int 3) (const_int 7)])))]
+;  "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+;{
+;  if (VECTOR_ELT_ORDER_BIG)
+;    return "vmrglw %0,%1,%2";
+;  else
+;    return "vmrghw %0,%2,%1";
+;}
+;  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrglw_internal"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                     (match_operand:V4SI 2 "register_operand" "v")]
+                     UNSPEC_VMRGLW_INTERNAL))]
+  "TARGET_ALTIVEC"
   "vmrglw %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
@@ -948,7 +1226,12 @@
         (parallel [(const_int 2) (const_int 6)
                    (const_int 3) (const_int 7)])))]
   "VECTOR_MEM_ALTIVEC_P (V4SFmode)"
-  "vmrglw %0,%1,%2"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrglw %0,%1,%2";
+  else
+    return "vmrghw %0,%2,%1";
+}
   [(set_attr "type" "vecperm")])
 
 ;; Power8 vector merge even/odd
@@ -2225,13 +2508,13 @@
     {
       emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrghh_internal (operands[0], ve, vo));
     }
   else
     {
       emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrghh (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrghh_internal (operands[0], vo, ve));
     }
   DONE;
 }")
@@ -2251,13 +2534,13 @@
     {
       emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrglh_internal (operands[0], ve, vo));
     }
   else
     {
       emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrglh (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrglh_internal (operands[0], vo, ve));
     }
   DONE;
 }")
@@ -2277,13 +2560,13 @@
     {
       emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrghh_internal (operands[0], ve, vo));
     }
   else
     {
       emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrghh (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrghh_internal (operands[0], vo, ve));
     }
   DONE;
 }")
@@ -2303,13 +2586,13 @@
     {
       emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrglh_internal (operands[0], ve, vo));
     }
   else
     {
       emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrglh (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrglh_internal (operands[0], vo, ve));
     }
   DONE;
 }")
@@ -2329,13 +2612,13 @@
     {
       emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrghw_internal (operands[0], ve, vo));
     }
   else
     {
       emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrghw (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrghw_internal (operands[0], vo, ve));
     }
   DONE;
 }")
@@ -2355,13 +2638,13 @@
     {
       emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrglw_internal (operands[0], ve, vo));
     }
   else
     {
       emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrglw (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrglw_internal (operands[0], vo, ve));
     }
   DONE;
 }")
@@ -2381,13 +2664,13 @@
     {
       emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrghw_internal (operands[0], ve, vo));
     }
   else
     {
       emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrghw (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrghw_internal (operands[0], vo, ve));
     }
   DONE;
 }")
@@ -2407,13 +2690,13 @@
     {
       emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrglw_internal (operands[0], ve, vo));
     }
   else
     {
       emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2]));
       emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2]));
-      emit_insn (gen_altivec_vmrglw (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrglw_internal (operands[0], vo, ve));
     }
   DONE;
 }")

[PATCH, rs6000] Implement -maltivec=be for vec_mergeh and vec_mergel Altivec builtins

Reply via email to