On Fri, Jul 28, 2017 at 04:08:50PM -0500, Segher Boessenkool wrote: > > "emit" is not a good name for this: that is generally used for something > that does emit_insn, i.e. put an insn in the instruction stream. This > function returns a string a define_insn can return. For the rl* insns > I called the similar functions rs6000_insn_for_*, maybe something like > that is better here?
... > I think calling this with the rtx elementN args makes this only more > complicated (the function comment doesn't say what they are or what > NULL means, btw). ... > In this and the other testcase, should you test no other insns at all > are generated? Here are the revised patches. I tested on a little endian power8 system and a big endian power7 system. Are these patches ok for the trunk? [gcc] 2017-08-02 Michael Meissner <meiss...@linux.vnet.ibm.com> PR target/81593 * config/rs6000/rs6000-protos.h (rs6000_output_xxpermdi): New declaration. * config/rs6000/rs6000.c (rs6000_output_xxpermdi): New function to emit XXPERMDI accessing either double word in either vector register inputs. * config/rs6000/vsx.md (vsx_concat_<mode>, VSX_D iterator): Rewrite VEC_CONCAT insn to call rs6000_output_xxpermdi. Simplify the constraints with the removal of the -mupper-regs-* switches. (vsx_concat_<mode>_1): New combiner insns to optimize CONCATs where either register might have come from VEC_SELECT. (vsx_concat_<mode>_2): Likewise. (vsx_concat_<mode>_3): Likewise. (vsx_set_<mode>, VSX_D iterator): Rewrite insn to generate a VEC_CONCAT rather than use an UNSPEC to specify the option. [gcc/testsuite] 2017-08-02 Michael Meissner <meiss...@linux.vnet.ibm.com> PR target/81593 * gcc.target/powerpc/vsx-extract-6.c: New test. * gcc.target/powerpc/vsx-extract-7.c: Likewise. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000-protos.h =================================================================== --- gcc/config/rs6000/rs6000-protos.h (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 250793) +++ gcc/config/rs6000/rs6000-protos.h (.../gcc/config/rs6000) (working copy) @@ -233,6 +233,7 @@ extern void rs6000_asm_output_dwarf_pcre const char *label); extern void rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label); +extern const char *rs6000_output_xxpermdi (rtx, rtx, rtx, rtx, rtx); /* Declare functions in rs6000-c.c */ Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 250793) +++ gcc/config/rs6000/rs6000.c (.../gcc/config/rs6000) (working copy) @@ -39007,6 +39007,60 @@ rs6000_optab_supported_p (int op, machin return true; } } + + +/* Output a xxpermdi instruction that sets a 128-bit vector DEST combining two + inputs SRC1 and SRC2. + + If ELEMENT1 is null, use the top 64-bit double word of ARG1. If it is + non-NULL, it is a 0 or 1 constant that gives the vector element number to + use for extracting the 64-bit double word from ARG1. + + If ELEMENT2 is null, use the top 64-bit double word of ARG2. If it is + non-NULL, it is a 0 or 1 constant that gives the vector element number to + use for extracting the 64-bit double word from ARG2. + + The element number is based on the user element ordering, set by the + endianess and by the -maltivec={le,be} options. */ + +const char * +rs6000_output_xxpermdi (rtx dest, + rtx src1, + rtx src2, + rtx element1, + rtx element2) +{ + int op1_dword = (!element1) ? 0 : INTVAL (element1); + int op2_dword = (!element2) ? 0 : INTVAL (element2); + rtx xops[10]; + const char *insn_string; + + gcc_assert (IN_RANGE (op1_dword | op2_dword, 0, 1)); + xops[0] = dest; + xops[1] = src1; + xops[2] = src2; + + if (BYTES_BIG_ENDIAN) + { + xops[3] = GEN_INT (2*op1_dword + op2_dword); + insn_string = "xxpermdi %x0,%x1,%x2,%3"; + } + else + { + if (element1) + op1_dword = 1 - op1_dword; + + if (element2) + op2_dword = 1 - op2_dword; + + xops[3] = GEN_INT (op1_dword + 2*op2_dword); + insn_string = "xxpermdi %x0,%x2,%x1,%3"; + } + + output_asm_insn (insn_string, xops); + return ""; +} + struct gcc_target targetm = TARGET_INITIALIZER; Index: gcc/config/rs6000/vsx.md =================================================================== --- gcc/config/rs6000/vsx.md (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 250793) +++ gcc/config/rs6000/vsx.md (.../gcc/config/rs6000) (working copy) @@ -2364,19 +2364,18 @@ (define_insn "*vsx_float_fix_v2df2" ;; Build a V2DF/V2DI vector from two scalars (define_insn "vsx_concat_<mode>" - [(set (match_operand:VSX_D 0 "gpc_reg_operand" "=<VSa>,we") + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we") (vec_concat:VSX_D - (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VS_64reg>,b") - (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VS_64reg>,b")))] + (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b") + (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))] "VECTOR_MEM_VSX_P (<MODE>mode)" { if (which_alternative == 0) - return (BYTES_BIG_ENDIAN - ? "xxpermdi %x0,%x1,%x2,0" - : "xxpermdi %x0,%x2,%x1,0"); + return rs6000_output_xxpermdi (operands[0], operands[1], operands[2], + NULL_RTX, NULL_RTX); else if (which_alternative == 1) - return (BYTES_BIG_ENDIAN + return (VECTOR_ELT_ORDER_BIG ? "mtvsrdd %x0,%1,%2" : "mtvsrdd %x0,%2,%1"); @@ -2385,6 +2384,50 @@ (define_insn "vsx_concat_<mode>" } [(set_attr "type" "vecperm")]) +;; Combiner patterns to allow creating XXPERMDI's to access either double +;; register in a vector register. Note, rs6000_output_xxpermdi expects +;; operands[0..2] to be the vector registers. +(define_insn "*vsx_concat_<mode>_1" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") + (vec_concat:VSX_D + (vec_select:<VS_scalar> + (match_operand:VSX_D 1 "gpc_reg_operand" "wa") + (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")])) + (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa")))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + return rs6000_output_xxpermdi (operands[0], operands[1], operands[2], + operands[3], NULL_RTX); +}) + +(define_insn "*vsx_concat_<mode>_2" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") + (vec_concat:VSX_D + (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa") + (vec_select:<VS_scalar> + (match_operand:VSX_D 2 "gpc_reg_operand" "wa") + (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + return rs6000_output_xxpermdi (operands[0], operands[1], operands[2], + NULL_RTX, operands[3]); +}) + +(define_insn "*vsx_concat_<mode>_3" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") + (vec_concat:VSX_D + (vec_select:<VS_scalar> + (match_operand:VSX_D 1 "gpc_reg_operand" "wa") + (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")])) + (vec_select:<VS_scalar> + (match_operand:VSX_D 2 "gpc_reg_operand" "wa") + (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + return rs6000_output_xxpermdi (operands[0], operands[1], operands[2], + operands[3], operands[4]); +}) + ;; Special purpose concat using xxpermdi to glue two single precision values ;; together, relying on the fact that internally scalar floats are represented ;; as doubles. This is used to initialize a V4SF vector with 4 floats @@ -2585,25 +2628,35 @@ (define_expand "vsx_set_v1ti" DONE; }) -;; Set the element of a V2DI/VD2F mode -(define_insn "vsx_set_<mode>" - [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>") - (unspec:VSX_D - [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>") - (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>") - (match_operand:QI 3 "u5bit_cint_operand" "i,i")] - UNSPEC_VSX_SET))] +;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT +(define_expand "vsx_set_<mode>" + [(use (match_operand:VSX_D 0 "vsx_register_operand")) + (use (match_operand:VSX_D 1 "vsx_register_operand")) + (use (match_operand:<VS_scalar> 2 "gpc_reg_operand")) + (use (match_operand:QI 3 "const_0_to_1_operand"))] "VECTOR_MEM_VSX_P (<MODE>mode)" { - int idx_first = BYTES_BIG_ENDIAN ? 0 : 1; - if (INTVAL (operands[3]) == idx_first) - return \"xxpermdi %x0,%x2,%x1,1\"; - else if (INTVAL (operands[3]) == 1 - idx_first) - return \"xxpermdi %x0,%x1,%x2,0\"; + rtx dest = operands[0]; + rtx vec_reg = operands[1]; + rtx value = operands[2]; + rtx ele = operands[3]; + rtx tmp = gen_reg_rtx (<VS_scalar>mode); + + if (ele == const0_rtx) + { + emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx)); + emit_insn (gen_vsx_concat_<mode> (dest, value, tmp)); + DONE; + } + else if (ele == const1_rtx) + { + emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx)); + emit_insn (gen_vsx_concat_<mode> (dest, tmp, value)); + DONE; + } else gcc_unreachable (); -} - [(set_attr "type" "vecperm")]) +}) ;; Extract a DF/DI element from V2DF/V2DI ;; Optimize cases were we can do a simple or direct move. Index: gcc/testsuite/gcc.target/powerpc/vsx-extract-6.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vsx-extract-6.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vsx-extract-6.c (.../gcc/testsuite/gcc.target/powerpc) (revision 250804) @@ -0,0 +1,25 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx" } */ + +vector unsigned long +test_vpasted (vector unsigned long high, vector unsigned long low) +{ + vector unsigned long res; + res[1] = high[1]; + res[0] = low[0]; + return res; +} + +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */ +/* { dg-final { scan-assembler-not {\mvspltisw\M} } } */ +/* { dg-final { scan-assembler-not {\mxxlor\M} } } */ +/* { dg-final { scan-assembler-not {\mxxlxor\M} } } */ +/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mlxv[dw][24]x\M} } } */ +/* { dg-final { scan-assembler-not {\mlvx\M} } } */ +/* { dg-final { scan-assembler-not {\mstxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mstxv[dw][24]x\M} } } */ +/* { dg-final { scan-assembler-not {\mstvx\M} } } */ Index: gcc/testsuite/gcc.target/powerpc/vsx-extract-7.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vsx-extract-7.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vsx-extract-7.c (.../gcc/testsuite/gcc.target/powerpc) (revision 250804) @@ -0,0 +1,25 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx" } */ + +vector double +test_vpasted (vector double high, vector double low) +{ + vector double res; + res[1] = high[1]; + res[0] = low[0]; + return res; +} + +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */ +/* { dg-final { scan-assembler-not {\mvspltisw\M} } } */ +/* { dg-final { scan-assembler-not {\mxxlor\M} } } */ +/* { dg-final { scan-assembler-not {\mxxlxor\M} } } */ +/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mlxv[dw][24]x\M} } } */ +/* { dg-final { scan-assembler-not {\mlvx\M} } } */ +/* { dg-final { scan-assembler-not {\mstxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mstxv[dw][24]x\M} } } */ +/* { dg-final { scan-assembler-not {\mstvx\M} } } */