Hi Mike,
On Fri, Dec 09, 2016 at 03:48:10PM -0500, Michael Meissner wrote:
> @@ -7528,6 +7528,49 @@ rs6000_split_vec_extract_var (rtx dest,
> + if (TARGET_P9_VECTOR
> + && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
> + && INT_REGNO_P (dest_regno)
> + && ALTIVEC_REGNO_P (src_regno)
> + && INT_REGNO_P (element_regno))
> + {
> + rtx insn = NULL_RTX;
You don't need this initialisation (which just works around a warning) if
you do the emit_insn directly for all cases.
> + else if (mode == V8HImode)
> + {
> + emit_insn (gen_ashlsi3 (dest_si, element_si, const1_rtx));
> + insn = (VECTOR_ELT_ORDER_BIG
> + ? gen_vextuhlx (dest_si, dest_si, src)
> + : gen_vextuhrx (dest_si, dest_si, src));
> + }
If you use a separate temporary reg (instead of reusing dest_si) GCC
has a better chance at optimising this (also in the next case).
> @@ -2535,36 +2547,63 @@ (define_expand "vsx_extract_<mode>"
> })
>
> (define_insn "vsx_extract_<mode>_p9"
> - [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=<VSX_EX>")
> + [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
> (vec_select:<VS_scalar>
> - (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>")
> - (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))]
> + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
> + (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))]
> "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
> && TARGET_VSX_SMALL_INTEGER"
> {
> - HOST_WIDE_INT elt = INTVAL (operands[2]);
> - HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
> - ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
> - : elt);
> -
> - HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
> - HOST_WIDE_INT offset = unit_size * elt_adj;
> -
> - operands[2] = GEN_INT (offset);
> - if (unit_size == 4)
> - return "xxextractuw %x0,%x1,%2";
> + if (which_alternative == 0)
> + return "#";
> +
> else
Without else-after-return the code as well as the patch become more readable.
> - return "vextractu<wd> %0,%1,%2";
> + {
> + HOST_WIDE_INT elt = INTVAL (operands[2]);
> + HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
> + ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
> + : elt);
> +
> + HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
> + HOST_WIDE_INT offset = unit_size * elt_adj;
> +
> + operands[2] = GEN_INT (offset);
> + if (unit_size == 4)
> + return "xxextractuw %x0,%x1,%2";
> + else
> + return "vextractu<wd> %0,%1,%2";
> + }
> }
> [(set_attr "type" "vecsimple")])
> +(define_split
> + [(set (match_operand:<VS_scalar> 0 "int_reg_operand" "")
> + (vec_select:<VS_scalar>
> + (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "")
> + (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "")])))]
> + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
> + && TARGET_VSX_SMALL_INTEGER && reload_completed"
> + [(const_int 0)]
Please lose the default args ("").
Why only do the split when reload_completed?
> +(define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
> + [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
> + (zero_extend:SDI
> + (unspec:<VSX_EXTRACT_I:VS_scalar>
> + [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
> + (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
> + UNSPEC_VSX_EXTRACT)))
> + (clobber (match_scratch:DI 3 "=X,r,&b"))
> + (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
> + "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
> + "#"
> + "&& reload_completed"
And here. And everything following.
> + [(const_int 0)]
> +{
> + machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
> + rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
> + operands[1], operands[2],
> + operands[3], operands[4]);
> + DONE;
> +})
Please write the gen_rtx_REG on its own line (but you can eliminate smode
then).
> --- gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c
> (.../svn+ssh://[email protected]/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc)
> (revision 0)
> +++ gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c
> (.../gcc/testsuite/gcc.target/powerpc) (revision 243492)
> @@ -0,0 +1,12 @@
> +/* { dg-do run { target { powerpc*-*-linux* } } } */
> +/* { dg-require-effective-target vsx_hw } */
> +/* { dg-options "-O2 -mvsx" } */
Why limit to linux? In most other testcases we explicitly disallow most
other targets (also not ideal at all, no, but consistency helps here).
Same for the other new files.
> +/* { dg-final { scan-assembler "vextub\[lr\]x" } } */
> +/* { dg-final { scan-assembler "vextuh\[lr\]x" } } */
> +/* { dg-final { scan-assembler "vextuw\[lr\]x" } } */
> +/* { dg-final { scan-assembler "extsb" } } */
> +/* { dg-final { scan-assembler "extsh" } } */
> +/* { dg-final { scan-assembler "extsw" } } */
> +/* { dg-final { scan-assembler-not "m\[ft\]vsr" } } */
> +/* { dg-final { scan-assembler-not "stxvd2x" } } */
> +/* { dg-final { scan-assembler-not "stxv" } } */
> +/* { dg-final { scan-assembler-not "lwa" } } */
> +/* { dg-final { scan-assembler-not "lwz" } } */
> +/* { dg-final { scan-assembler-not "lha" } } */
> +/* { dg-final { scan-assembler-not "lhz" } } */
The "stxvd2x" is already covered by the "stxv". Things like "lwa" are
an accident waiting to happen (it matches "always", for example).
A comment here saying what exactly you intend to test would help, too.
Segher