Hi, This patch continues the series of changes to the Altivec builtins to accommodate big-endian element order when targeting a little endian machine. Here the focus is on the vector merge-high and merge-low operations.
The primary change is in altivec.md. As an example, look at the pattern altivec_vmrghw. Previously, this was represented with a single define_insn. Now it's been split into a define_expand to create the RTL, and a define_insn (*altivec_vmrghw_endian) to generate the hardware instruction. This is because we need a different selection vector when using -maltivec=be and targeting LE. (Normally LE and BE can use the same selection vector, and GCC takes care of interpreting the indices as left-to-right or right-to-left.) The new define_insn also substitutes vmrglw with swapped operands for vmrghw for little-endian mode, since the hardware instruction has "big-endian bias" in the interpretation of "high" and "low." Because -maltivec=be applies only to programmer-specified builtins, we need to adjust internal uses of altivec_vmrghw and friends. Thus we have a new define_insn altivec_vmrghw_internal that generates the hardware instruction directly with none of the above transformations. New unspecs are needed for these internal forms. The VSX flavors of merge-high and merge-low are a little simpler (see vsx.md). Here we already had a define_expand where the instructions are generated by separate xxpermdi patterns, and there are no internal uses to worry about. So we only need to change the selection vector in the generated RTL. There are four new test cases that cover all of the supported data types. Tests are divided between those that require only VMX instructions and those that require VSX instructions. There are also variants for -maltivec and -maltivec=be. Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no regressions. Ok for trunk? Thanks, Bill gcc: 2014-01-23 Bill Schmidt <wschm...@linux.vnet.ibm.com> * config/rs6000/rs6000.c (altivec_expand_vec_perm_const): Use CODE_FOR_altivec_vmrg*_internal rather than CODE_FOR_altivec_vmrg*. * config/rs6000/vsx.md (vsx_mergel_<mode>): Adjust for -maltivec=be with LE targets. (vsx_mergeh_<mode>): Likewise. * config/rs6000/altivec.md (UNSPEC_VMRG[HL][BHW]_INTERNAL): New unspecs. (mulv8hi3): Use gen_altivec_vmrg[hl]w_internal. (altivec_vmrghb): Replace with define_expand and new *altivec_vmrghb_endian insn; adjust for -maltivec=be with LE targets. (altivec_vmrghb_internal): New define_insn. (altivec_vmrghh): Replace with define_expand and new *altivec_vmrghh_endian insn; adjust for -maltivec=be with LE targets. (altivec_vmrghh_internal): New define_insn. (altivec_vmrghw): Replace with define_expand and new *altivec_vmrghw_endian insn; adjust for -maltivec=be with LE targets. (altivec_vmrghw_internal): New define_insn. (*altivec_vmrghsf): Adjust for endianness. (altivec_vmrglb): Replace with define_expand and new *altivec_vmrglb_endian insn; adjust for -maltivec=be with LE targets. (altivec_vmrglb_internal): New define_insn. (altivec_vmrglh): Replace with define_expand and new *altivec_vmrglh_endian insn; adjust for -maltivec=be with LE targets. (altivec_vmrglh_internal): New define_insn. (altivec_vmrglw): Replace with define_expand and new *altivec_vmrglw_endian insn; adjust for -maltivec=be with LE targets. (altivec_vmrglw_internal): New define_insn. (*altivec_vmrglsf): Adjust for endianness. (vec_widen_umult_hi_v16qi): Use gen_altivec_vmrghh_internal. (vec_widen_umult_lo_v16qi): Use gen_altivec_vmrglh_internal. (vec_widen_smult_hi_v16qi): Use gen_altivec_vmrghh_internal. (vec_widen_smult_lo_v16qi): Use gen_altivec_vmrglh_internal. (vec_widen_umult_hi_v8hi): Use gen_altivec_vmrghw_internal. (vec_widen_umult_lo_v8hi): Use gen_altivec_vmrglw_internal. (vec_widen_smult_hi_v8hi): Use gen_altivec_vmrghw_internal. (vec_widen_smult_lo_v8hi): Use gen_altivec_vmrglw_internal. gcc/testsuite: 2014-01-23 Bill Schmidt <wschm...@linux.vnet.ibm.com> * gcc.dg/vmx/merge-be-order.c: New. * gcc.dg/vmx/merge.c: New. * gcc.dg/vmx/merge-vsx-be-order.c: New. * gcc.dg/vmx/merge-vsx.c: New. Index: gcc/testsuite/gcc.dg/vmx/merge-be-order.c =================================================================== --- gcc/testsuite/gcc.dg/vmx/merge-be-order.c (revision 0) +++ gcc/testsuite/gcc.dg/vmx/merge-be-order.c (revision 0) @@ -0,0 +1,96 @@ +/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */ + +#include "harness.h" + +static void test() +{ + /* Input vectors. */ + vector unsigned char vuca = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + vector unsigned char vucb + = {16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; + vector signed char vsca + = {-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1}; + vector signed char vscb = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + vector unsigned short vusa = {0,1,2,3,4,5,6,7}; + vector unsigned short vusb = {8,9,10,11,12,13,14,15}; + vector signed short vssa = {-8,-7,-6,-5,-4,-3,-2,-1}; + vector signed short vssb = {0,1,2,3,4,5,6,7}; + vector unsigned int vuia = {0,1,2,3}; + vector unsigned int vuib = {4,5,6,7}; + vector signed int vsia = {-4,-3,-2,-1}; + vector signed int vsib = {0,1,2,3}; + vector float vfa = {-4.0,-3.0,-2.0,-1.0}; + vector float vfb = {0.0,1.0,2.0,3.0}; + + /* Result vectors. */ + vector unsigned char vuch, vucl; + vector signed char vsch, vscl; + vector unsigned short vush, vusl; + vector signed short vssh, vssl; + vector unsigned int vuih, vuil; + vector signed int vsih, vsil; + vector float vfh, vfl; + + /* Expected result vectors. */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + vector unsigned char vucrh = {24,8,25,9,26,10,27,11,28,12,29,13,30,14,31,15}; + vector unsigned char vucrl = {16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7}; + vector signed char vscrh = {8,-8,9,-7,10,-6,11,-5,12,-4,13,-3,14,-2,15,-1}; + vector signed char vscrl = {0,-16,1,-15,2,-14,3,-13,4,-12,5,-11,6,-10,7,-9}; + vector unsigned short vusrh = {12,4,13,5,14,6,15,7}; + vector unsigned short vusrl = {8,0,9,1,10,2,11,3}; + vector signed short vssrh = {4,-4,5,-3,6,-2,7,-1}; + vector signed short vssrl = {0,-8,1,-7,2,-6,3,-5}; + vector unsigned int vuirh = {6,2,7,3}; + vector unsigned int vuirl = {4,0,5,1}; + vector signed int vsirh = {2,-2,3,-1}; + vector signed int vsirl = {0,-4,1,-3}; + vector float vfrh = {2.0,-2.0,3.0,-1.0}; + vector float vfrl = {0.0,-4.0,1.0,-3.0}; +#else + vector unsigned char vucrh = {0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23}; + vector unsigned char vucrl = {8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31}; + vector signed char vscrh = {-16,0,-15,1,-14,2,-13,3,-12,4,-11,5,-10,6,-9,7}; + vector signed char vscrl = {-8,8,-7,9,-6,10,-5,11,-4,12,-3,13,-2,14,-1,15}; + vector unsigned short vusrh = {0,8,1,9,2,10,3,11}; + vector unsigned short vusrl = {4,12,5,13,6,14,7,15}; + vector signed short vssrh = {-8,0,-7,1,-6,2,-5,3}; + vector signed short vssrl = {-4,4,-3,5,-2,6,-1,7}; + vector unsigned int vuirh = {0,4,1,5}; + vector unsigned int vuirl = {2,6,3,7}; + vector signed int vsirh = {-4,0,-3,1}; + vector signed int vsirl = {-2,2,-1,3}; + vector float vfrh = {-4.0,0.0,-3.0,1.0}; + vector float vfrl = {-2.0,2.0,-1.0,3.0}; +#endif + + vuch = vec_mergeh (vuca, vucb); + vucl = vec_mergel (vuca, vucb); + vsch = vec_mergeh (vsca, vscb); + vscl = vec_mergel (vsca, vscb); + vush = vec_mergeh (vusa, vusb); + vusl = vec_mergel (vusa, vusb); + vssh = vec_mergeh (vssa, vssb); + vssl = vec_mergel (vssa, vssb); + vuih = vec_mergeh (vuia, vuib); + vuil = vec_mergel (vuia, vuib); + vsih = vec_mergeh (vsia, vsib); + vsil = vec_mergel (vsia, vsib); + vfh = vec_mergeh (vfa, vfb ); + vfl = vec_mergel (vfa, vfb ); + + check (vec_all_eq (vuch, vucrh), "vuch"); + check (vec_all_eq (vucl, vucrl), "vucl"); + check (vec_all_eq (vsch, vscrh), "vsch"); + check (vec_all_eq (vscl, vscrl), "vscl"); + check (vec_all_eq (vush, vusrh), "vush"); + check (vec_all_eq (vusl, vusrl), "vusl"); + check (vec_all_eq (vssh, vssrh), "vssh"); + check (vec_all_eq (vssl, vssrl), "vssl"); + check (vec_all_eq (vuih, vuirh), "vuih"); + check (vec_all_eq (vuil, vuirl), "vuil"); + check (vec_all_eq (vsih, vsirh), "vsih"); + check (vec_all_eq (vsil, vsirl), "vsil"); + check (vec_all_eq (vfh, vfrh), "vfh"); + check (vec_all_eq (vfl, vfrl), "vfl"); +} Index: gcc/testsuite/gcc.dg/vmx/merge.c =================================================================== --- gcc/testsuite/gcc.dg/vmx/merge.c (revision 0) +++ gcc/testsuite/gcc.dg/vmx/merge.c (revision 0) @@ -0,0 +1,77 @@ +#include "harness.h" + +static void test() +{ + /* Input vectors. */ + vector unsigned char vuca = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + vector unsigned char vucb + = {16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; + vector signed char vsca + = {-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1}; + vector signed char vscb = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + vector unsigned short vusa = {0,1,2,3,4,5,6,7}; + vector unsigned short vusb = {8,9,10,11,12,13,14,15}; + vector signed short vssa = {-8,-7,-6,-5,-4,-3,-2,-1}; + vector signed short vssb = {0,1,2,3,4,5,6,7}; + vector unsigned int vuia = {0,1,2,3}; + vector unsigned int vuib = {4,5,6,7}; + vector signed int vsia = {-4,-3,-2,-1}; + vector signed int vsib = {0,1,2,3}; + vector float vfa = {-4.0,-3.0,-2.0,-1.0}; + vector float vfb = {0.0,1.0,2.0,3.0}; + + /* Result vectors. */ + vector unsigned char vuch, vucl; + vector signed char vsch, vscl; + vector unsigned short vush, vusl; + vector signed short vssh, vssl; + vector unsigned int vuih, vuil; + vector signed int vsih, vsil; + vector float vfh, vfl; + + /* Expected result vectors. */ + vector unsigned char vucrh = {0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23}; + vector unsigned char vucrl = {8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31}; + vector signed char vscrh = {-16,0,-15,1,-14,2,-13,3,-12,4,-11,5,-10,6,-9,7}; + vector signed char vscrl = {-8,8,-7,9,-6,10,-5,11,-4,12,-3,13,-2,14,-1,15}; + vector unsigned short vusrh = {0,8,1,9,2,10,3,11}; + vector unsigned short vusrl = {4,12,5,13,6,14,7,15}; + vector signed short vssrh = {-8,0,-7,1,-6,2,-5,3}; + vector signed short vssrl = {-4,4,-3,5,-2,6,-1,7}; + vector unsigned int vuirh = {0,4,1,5}; + vector unsigned int vuirl = {2,6,3,7}; + vector signed int vsirh = {-4,0,-3,1}; + vector signed int vsirl = {-2,2,-1,3}; + vector float vfrh = {-4.0,0.0,-3.0,1.0}; + vector float vfrl = {-2.0,2.0,-1.0,3.0}; + + vuch = vec_mergeh (vuca, vucb); + vucl = vec_mergel (vuca, vucb); + vsch = vec_mergeh (vsca, vscb); + vscl = vec_mergel (vsca, vscb); + vush = vec_mergeh (vusa, vusb); + vusl = vec_mergel (vusa, vusb); + vssh = vec_mergeh (vssa, vssb); + vssl = vec_mergel (vssa, vssb); + vuih = vec_mergeh (vuia, vuib); + vuil = vec_mergel (vuia, vuib); + vsih = vec_mergeh (vsia, vsib); + vsil = vec_mergel (vsia, vsib); + vfh = vec_mergeh (vfa, vfb ); + vfl = vec_mergel (vfa, vfb ); + + check (vec_all_eq (vuch, vucrh), "vuch"); + check (vec_all_eq (vucl, vucrl), "vucl"); + check (vec_all_eq (vsch, vscrh), "vsch"); + check (vec_all_eq (vscl, vscrl), "vscl"); + check (vec_all_eq (vush, vusrh), "vush"); + check (vec_all_eq (vusl, vusrl), "vusl"); + check (vec_all_eq (vssh, vssrh), "vssh"); + check (vec_all_eq (vssl, vssrl), "vssl"); + check (vec_all_eq (vuih, vuirh), "vuih"); + check (vec_all_eq (vuil, vuirl), "vuil"); + check (vec_all_eq (vsih, vsirh), "vsih"); + check (vec_all_eq (vsil, vsirl), "vsil"); + check (vec_all_eq (vfh, vfrh), "vfh"); + check (vec_all_eq (vfl, vfrl), "vfl"); +} Index: gcc/testsuite/gcc.dg/vmx/merge-vsx-be-order.c =================================================================== --- gcc/testsuite/gcc.dg/vmx/merge-vsx-be-order.c (revision 0) +++ gcc/testsuite/gcc.dg/vmx/merge-vsx-be-order.c (revision 0) @@ -0,0 +1,46 @@ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mvsx" } */ + +#include "harness.h" + +static int vec_long_eq (vector long x, vector long y) +{ + return (x[0] == y[0] && x[1] == y[1]); +} + +static void test() +{ + /* Input vectors. */ + vector long vla = {-2,-1}; + vector long vlb = {0,1}; + vector double vda = {-2.0,-1.0}; + vector double vdb = {0.0,1.0}; + + /* Result vectors. */ + vector long vlh, vll; + vector double vdh, vdl; + + /* Expected result vectors. */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + vector long vlrh = {1,-1}; + vector long vlrl = {0,-2}; + vector double vdrh = {1.0,-1.0}; + vector double vdrl = {0.0,-2.0}; +#else + vector long vlrh = {-2,0}; + vector long vlrl = {-1,1}; + vector double vdrh = {-2.0,0.0}; + vector double vdrl = {-1.0,1.0}; +#endif + + vlh = vec_mergeh (vla, vlb); + vll = vec_mergel (vla, vlb); + vdh = vec_mergeh (vda, vdb); + vdl = vec_mergel (vda, vdb); + + check (vec_long_eq (vlh, vlrh), "vlh"); + check (vec_long_eq (vll, vlrl), "vll"); + check (vec_all_eq (vdh, vdrh), "vdh" ); + check (vec_all_eq (vdl, vdrl), "vdl" ); +} Index: gcc/testsuite/gcc.dg/vmx/merge-vsx.c =================================================================== --- gcc/testsuite/gcc.dg/vmx/merge-vsx.c (revision 0) +++ gcc/testsuite/gcc.dg/vmx/merge-vsx.c (revision 0) @@ -0,0 +1,39 @@ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-maltivec -mabi=altivec -std=gnu99 -mvsx" } */ + +#include "harness.h" + +static int vec_long_eq (vector long x, vector long y) +{ + return (x[0] == y[0] && x[1] == y[1]); +} + +static void test() +{ + /* Input vectors. */ + vector long vla = {-2,-1}; + vector long vlb = {0,1}; + vector double vda = {-2.0,-1.0}; + vector double vdb = {0.0,1.0}; + + /* Result vectors. */ + vector long vlh, vll; + vector double vdh, vdl; + + /* Expected result vectors. */ + vector long vlrh = {-2,0}; + vector long vlrl = {-1,1}; + vector double vdrh = {-2.0,0.0}; + vector double vdrl = {-1.0,1.0}; + + vlh = vec_mergeh (vla, vlb); + vll = vec_mergel (vla, vlb); + vdh = vec_mergeh (vda, vdb); + vdl = vec_mergel (vda, vdb); + + check (vec_long_eq (vlh, vlrh), "vlh"); + check (vec_long_eq (vll, vlrl), "vll"); + check (vec_all_eq (vdh, vdrh), "vdh" ); + check (vec_all_eq (vdl, vdrl), "vdl" ); +} Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 206889) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -29870,22 +29870,28 @@ altivec_expand_vec_perm_const (rtx operands[4]) { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum, { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, { OPTION_MASK_ALTIVEC, - BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb : CODE_FOR_altivec_vmrglb, + (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_internal + : CODE_FOR_altivec_vmrglb_internal), { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, { OPTION_MASK_ALTIVEC, - BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh : CODE_FOR_altivec_vmrglh, + (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_internal + : CODE_FOR_altivec_vmrglh_internal), { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, { OPTION_MASK_ALTIVEC, - BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw : CODE_FOR_altivec_vmrglw, + (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_internal + : CODE_FOR_altivec_vmrglw_internal), { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, { OPTION_MASK_ALTIVEC, - BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb : CODE_FOR_altivec_vmrghb, + (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_internal + : CODE_FOR_altivec_vmrghb_internal), { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, { OPTION_MASK_ALTIVEC, - BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh : CODE_FOR_altivec_vmrghh, + (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_internal + : CODE_FOR_altivec_vmrghh_internal), { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, { OPTION_MASK_ALTIVEC, - BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw : CODE_FOR_altivec_vmrghw, + (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_internal + : CODE_FOR_altivec_vmrghw_internal), { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew, { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } }, Index: gcc/config/rs6000/vsx.md =================================================================== --- gcc/config/rs6000/vsx.md (revision 206889) +++ gcc/config/rs6000/vsx.md (working copy) @@ -1655,25 +1672,55 @@ ;; Expanders for builtins (define_expand "vsx_mergel_<mode>" - [(set (match_operand:VSX_D 0 "vsx_register_operand" "") - (vec_select:VSX_D - (vec_concat:<VS_double> - (match_operand:VSX_D 1 "vsx_register_operand" "") - (match_operand:VSX_D 2 "vsx_register_operand" "")) - (parallel [(const_int 1) (const_int 3)])))] + [(use (match_operand:VSX_D 0 "vsx_register_operand" "")) + (use (match_operand:VSX_D 1 "vsx_register_operand" "")) + (use (match_operand:VSX_D 2 "vsx_register_operand" ""))] "VECTOR_MEM_VSX_P (<MODE>mode)" - "") +{ + rtvec v; + rtx x; + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); + x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); + x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); +}) + (define_expand "vsx_mergeh_<mode>" - [(set (match_operand:VSX_D 0 "vsx_register_operand" "") - (vec_select:VSX_D - (vec_concat:<VS_double> - (match_operand:VSX_D 1 "vsx_register_operand" "") - (match_operand:VSX_D 2 "vsx_register_operand" "")) - (parallel [(const_int 0) (const_int 2)])))] + [(use (match_operand:VSX_D 0 "vsx_register_operand" "")) + (use (match_operand:VSX_D 1 "vsx_register_operand" "")) + (use (match_operand:VSX_D 2 "vsx_register_operand" ""))] "VECTOR_MEM_VSX_P (<MODE>mode)" - "") +{ + rtvec v; + rtx x; + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); + x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); + x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); +}) + ;; V2DF/V2DI splat (define_insn "vsx_splat_<mode>" [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?wa,?wa,?wa") Index: gcc/config/rs6000/altivec.md =================================================================== --- gcc/config/rs6000/altivec.md (revision 206889) +++ gcc/config/rs6000/altivec.md (working copy) @@ -129,6 +129,12 @@ UNSPEC_VUPKHU_V4SF UNSPEC_VUPKLU_V4SF UNSPEC_VGBBD + UNSPEC_VMRGHB_INTERNAL + UNSPEC_VMRGHH_INTERNAL + UNSPEC_VMRGHW_INTERNAL + UNSPEC_VMRGLB_INTERNAL + UNSPEC_VMRGLH_INTERNAL + UNSPEC_VMRGLW_INTERNAL ]) (define_c_enum "unspecv" @@ -677,8 +683,8 @@ { emit_insn (gen_altivec_vmulesh (even, operands[1], operands[2])); emit_insn (gen_altivec_vmulosh (odd, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghw (high, even, odd)); - emit_insn (gen_altivec_vmrglw (low, even, odd)); + emit_insn (gen_altivec_vmrghw_internal (high, even, odd)); + emit_insn (gen_altivec_vmrglw_internal (low, even, odd)); emit_insn (gen_altivec_vpkuwum (operands[0], high, low)); } else @@ -839,9 +845,40 @@ "vmladduhm %0,%1,%2,%3" [(set_attr "type" "veccomplex")]) -(define_insn "altivec_vmrghb" +(define_expand "altivec_vmrghb" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25), + GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27), + GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29), + GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17), + GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19), + GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21), + GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); +}) + +(define_insn "*altivec_vmrghb_endian" [(set (match_operand:V16QI 0 "register_operand" "=v") - (vec_select:V16QI + (vec_select:V16QI (vec_concat:V32QI (match_operand:V16QI 1 "register_operand" "v") (match_operand:V16QI 2 "register_operand" "v")) @@ -854,12 +891,53 @@ (const_int 6) (const_int 22) (const_int 7) (const_int 23)])))] "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghb %0,%1,%2"; + else + return "vmrglb %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrghb_internal" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMRGHB_INTERNAL))] + "TARGET_ALTIVEC" "vmrghb %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrghh" +(define_expand "altivec_vmrghh" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13), + GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9), + GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); +}) + +(define_insn "*altivec_vmrghh_endian" [(set (match_operand:V8HI 0 "register_operand" "=v") - (vec_select:V8HI + (vec_select:V8HI (vec_concat:V16HI (match_operand:V8HI 1 "register_operand" "v") (match_operand:V8HI 2 "register_operand" "v")) @@ -868,10 +946,49 @@ (const_int 2) (const_int 10) (const_int 3) (const_int 11)])))] "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghh %0,%1,%2"; + else + return "vmrglh %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrghh_internal" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMRGHH_INTERNAL))] + "TARGET_ALTIVEC" "vmrghh %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrghw" +(define_expand "altivec_vmrghw" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:V4SI 2 "register_operand" ""))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); +}) + +(define_insn "*altivec_vmrghw_endian" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_select:V4SI (vec_concat:V8SI @@ -880,6 +997,20 @@ (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghw %0,%1,%2"; + else + return "vmrglw %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrghw_internal" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMRGHW_INTERNAL))] + "TARGET_ALTIVEC" "vmrghw %0,%1,%2" [(set_attr "type" "vecperm")]) @@ -892,10 +1023,46 @@ (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] "VECTOR_MEM_ALTIVEC_P (V4SFmode)" - "vmrghw %0,%1,%2" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghw %0,%1,%2"; + else + return "vmrglw %0,%2,%1"; +} [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrglb" +(define_expand "altivec_vmrglb" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17), + GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19), + GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21), + GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25), + GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27), + GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29), + GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); +}) + +(define_insn "*altivec_vmrglb_endian" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_select:V16QI (vec_concat:V32QI @@ -910,10 +1077,51 @@ (const_int 14) (const_int 30) (const_int 15) (const_int 31)])))] "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglb %0,%1,%2"; + else + return "vmrghb %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrglb_internal" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMRGLB_INTERNAL))] + "TARGET_ALTIVEC" "vmrglb %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrglh" +(define_expand "altivec_vmrglh" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9), + GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13), + GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); +}) + +(define_insn "*altivec_vmrglh_endian" [(set (match_operand:V8HI 0 "register_operand" "=v") (vec_select:V8HI (vec_concat:V16HI @@ -924,10 +1132,49 @@ (const_int 6) (const_int 14) (const_int 7) (const_int 15)])))] "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglh %0,%1,%2"; + else + return "vmrghh %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrglh_internal" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMRGLH_INTERNAL))] + "TARGET_ALTIVEC" "vmrglh %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrglw" +(define_expand "altivec_vmrglw" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:V4SI 2 "register_operand" ""))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); +}) + +(define_insn "*altivec_vmrglw_endian" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_select:V4SI (vec_concat:V8SI @@ -936,6 +1183,37 @@ (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))] "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglw %0,%1,%2"; + else + return "vmrghw %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +;(define_insn "altivec_vmrglw" +; [(set (match_operand:V4SI 0 "register_operand" "=v") +; (vec_select:V4SI +; (vec_concat:V8SI +; (match_operand:V4SI 1 "register_operand" "v") +; (match_operand:V4SI 2 "register_operand" "v")) +; (parallel [(const_int 2) (const_int 6) +; (const_int 3) (const_int 7)])))] +; "VECTOR_MEM_ALTIVEC_P (V4SImode)" +;{ +; if (VECTOR_ELT_ORDER_BIG) +; return "vmrglw %0,%1,%2"; +; else +; return "vmrghw %0,%2,%1"; +;} +; [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrglw_internal" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMRGLW_INTERNAL))] + "TARGET_ALTIVEC" "vmrglw %0,%1,%2" [(set_attr "type" "vecperm")]) @@ -948,7 +1226,12 @@ (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))] "VECTOR_MEM_ALTIVEC_P (V4SFmode)" - "vmrglw %0,%1,%2" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglw %0,%1,%2"; + else + return "vmrghw %0,%2,%1"; +} [(set_attr "type" "vecperm")]) ;; Power8 vector merge even/odd @@ -2225,13 +2508,13 @@ { emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + emit_insn (gen_altivec_vmrghh_internal (operands[0], ve, vo)); } else { emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghh (operands[0], vo, ve)); + emit_insn (gen_altivec_vmrghh_internal (operands[0], vo, ve)); } DONE; }") @@ -2251,13 +2534,13 @@ { emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + emit_insn (gen_altivec_vmrglh_internal (operands[0], ve, vo)); } else { emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglh (operands[0], vo, ve)); + emit_insn (gen_altivec_vmrglh_internal (operands[0], vo, ve)); } DONE; }") @@ -2277,13 +2560,13 @@ { emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + emit_insn (gen_altivec_vmrghh_internal (operands[0], ve, vo)); } else { emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghh (operands[0], vo, ve)); + emit_insn (gen_altivec_vmrghh_internal (operands[0], vo, ve)); } DONE; }") @@ -2303,13 +2586,13 @@ { emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + emit_insn (gen_altivec_vmrglh_internal (operands[0], ve, vo)); } else { emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglh (operands[0], vo, ve)); + emit_insn (gen_altivec_vmrglh_internal (operands[0], vo, ve)); } DONE; }") @@ -2329,13 +2612,13 @@ { emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + emit_insn (gen_altivec_vmrghw_internal (operands[0], ve, vo)); } else { emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghw (operands[0], vo, ve)); + emit_insn (gen_altivec_vmrghw_internal (operands[0], vo, ve)); } DONE; }") @@ -2355,13 +2638,13 @@ { emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + emit_insn (gen_altivec_vmrglw_internal (operands[0], ve, vo)); } else { emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglw (operands[0], vo, ve)); + emit_insn (gen_altivec_vmrglw_internal (operands[0], vo, ve)); } DONE; }") @@ -2381,13 +2664,13 @@ { emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + emit_insn (gen_altivec_vmrghw_internal (operands[0], ve, vo)); } else { emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghw (operands[0], vo, ve)); + emit_insn (gen_altivec_vmrghw_internal (operands[0], vo, ve)); } DONE; }") @@ -2407,13 +2690,13 @@ { emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + emit_insn (gen_altivec_vmrglw_internal (operands[0], ve, vo)); } else { emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglw (operands[0], vo, ve)); + emit_insn (gen_altivec_vmrglw_internal (operands[0], vo, ve)); } DONE; }")