On Fri, 2015-09-18 at 16:39 +0100, Alan Lawrence wrote: > This is a respin of https://gcc.gnu.org/ml/gcc-patches/2014-12/msg01024.html > after discovering that patch was broken on power64le - thanks to Bill Schmidt > for pointing out that gcc112 is the opposite endianness to gcc110... > > This time I decided to avoid any funny business with making RTL match other > patterns in other .md files, and instead to directly call the relevant > expanders. This should thus preserve the codegen of the previous expansion > path. > Moreover, combining the uplus and splus expansion paths (as addition is the > same > regardless of signedness) causes some additional examples to be reduced > directly > via patterns.
Alan, thanks for the patch! David will have to approve it, but this endian-corrected version looks good to me. Regards, Bill > > Bootstrapped + check-g{cc,++,fortran} > on powerpc64-none-linux-gnu (--with-cpu=power7) > and powerpc64le-none-linux-gnu (--with-cpu=power8). > > gcc/ChangeLog: > > * config/rs6000/altivec.md (reduc_splus_<mode>): Rename to... > (reduc_plus_scal_<mode>): ...this, add rs6000_expand_vector_extract. > (reduc_uplus_v16qi): Remove. > > * config/rs6000/vector.md (VEC_reduc_name): Change "splus" to "plus". > (reduc_<VEC_reduc_name>_v2df): Remove. > (reduc_<VEC_reduc_name>_v4sf): Remove. > (reduc_<VEC_reduc:VEC_reduc_name>_scal_<VEC_F:name>): New. > > * config/rs6000/vsx.md (vsx_reduc_<VEC_reduc_name>_v2df): Declare > gen_ function by removing * prefix. > (vsx_reduc_<VEC_reduc_name>_v4sf): Likewise. > --- > gcc/config/rs6000/altivec.md | 25 ++++++----------------- > gcc/config/rs6000/vector.md | 47 > ++++++++++++++++++-------------------------- > gcc/config/rs6000/vsx.md | 4 ++-- > 3 files changed, 27 insertions(+), 49 deletions(-) > > diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md > index 4170f38..93ce1f0 100644 > --- a/gcc/config/rs6000/altivec.md > +++ b/gcc/config/rs6000/altivec.md > @@ -2648,35 +2648,22 @@ > operands[3] = gen_reg_rtx (GET_MODE (operands[0])); > }) > > -(define_expand "reduc_splus_<mode>" > - [(set (match_operand:VIshort 0 "register_operand" "=v") > +(define_expand "reduc_plus_scal_<mode>" > + [(set (match_operand:<VI_scalar> 0 "register_operand" "=v") > (unspec:VIshort [(match_operand:VIshort 1 "register_operand" "v")] > UNSPEC_REDUC_PLUS))] > "TARGET_ALTIVEC" > { > rtx vzero = gen_reg_rtx (V4SImode); > rtx vtmp1 = gen_reg_rtx (V4SImode); > - rtx dest = gen_lowpart (V4SImode, operands[0]); > + rtx vtmp2 = gen_reg_rtx (<MODE>mode); > + rtx dest = gen_lowpart (V4SImode, vtmp2); > + int elt = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (<MODE>mode) - 1 : 0; > > emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); > emit_insn (gen_altivec_vsum4s<VI_char>s (vtmp1, operands[1], vzero)); > emit_insn (gen_altivec_vsumsws_direct (dest, vtmp1, vzero)); > - DONE; > -}) > - > -(define_expand "reduc_uplus_v16qi" > - [(set (match_operand:V16QI 0 "register_operand" "=v") > - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] > - UNSPEC_REDUC_PLUS))] > - "TARGET_ALTIVEC" > -{ > - rtx vzero = gen_reg_rtx (V4SImode); > - rtx vtmp1 = gen_reg_rtx (V4SImode); > - rtx dest = gen_lowpart (V4SImode, operands[0]); > - > - emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); > - emit_insn (gen_altivec_vsum4ubs (vtmp1, operands[1], vzero)); > - emit_insn (gen_altivec_vsumsws_direct (dest, vtmp1, vzero)); > + rs6000_expand_vector_extract (operands[0], vtmp2, elt); > DONE; > }) > > diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md > index 8821dec..d8699c8 100644 > --- a/gcc/config/rs6000/vector.md > +++ b/gcc/config/rs6000/vector.md > @@ -78,7 +78,7 @@ > ;; Vector reduction code iterators > (define_code_iterator VEC_reduc [plus smin smax]) > > -(define_code_attr VEC_reduc_name [(plus "splus") > +(define_code_attr VEC_reduc_name [(plus "plus") > (smin "smin") > (smax "smax")]) > > @@ -1061,38 +1061,29 @@ > "") > > ;; Vector reduction expanders for VSX > - > -(define_expand "reduc_<VEC_reduc_name>_v2df" > - [(parallel [(set (match_operand:V2DF 0 "vfloat_operand" "") > - (VEC_reduc:V2DF > - (vec_concat:V2DF > - (vec_select:DF > - (match_operand:V2DF 1 "vfloat_operand" "") > - (parallel [(const_int 1)])) > - (vec_select:DF > - (match_dup 1) > - (parallel [(const_int 0)]))) > - (match_dup 1))) > - (clobber (match_scratch:V2DF 2 ""))])] > - "VECTOR_UNIT_VSX_P (V2DFmode)" > - "") > - > -; The (VEC_reduc:V4SF > +; The (VEC_reduc:... > ; (op1) > -; (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)) > +; (unspec:... [(const_int 0)] UNSPEC_REDUC)) > ; > ; is to allow us to use a code iterator, but not completely list all of the > ; vector rotates, etc. to prevent canonicalization > > -(define_expand "reduc_<VEC_reduc_name>_v4sf" > - [(parallel [(set (match_operand:V4SF 0 "vfloat_operand" "") > - (VEC_reduc:V4SF > - (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) > - (match_operand:V4SF 1 "vfloat_operand" ""))) > - (clobber (match_scratch:V4SF 2 "")) > - (clobber (match_scratch:V4SF 3 ""))])] > - "VECTOR_UNIT_VSX_P (V4SFmode)" > - "") > + > +(define_expand "reduc_<VEC_reduc:VEC_reduc_name>_scal_<VEC_F:mode>" > + [(match_operand:<VEC_base> 0 "register_operand" "") > + (VEC_reduc:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") > + (unspec:VEC_F [(const_int 0)] UNSPEC_REDUC))] > + "VECTOR_UNIT_VSX_P (<VEC_F:MODE>mode)" > + { > + rtx vec = gen_reg_rtx (<VEC_F:MODE>mode); > + rtx elt = BYTES_BIG_ENDIAN > + ? gen_int_mode (GET_MODE_NUNITS (<VEC_F:MODE>mode) - 1, QImode) > + : const0_rtx; > + emit_insn (gen_vsx_reduc_<VEC_reduc:VEC_reduc_name>_<VEC_F:mode> (vec, > + operand1)); > + emit_insn (gen_vsx_extract_<VEC_F:mode> (operand0, vec, elt)); > + DONE; > + }) > > > ;;; Expanders for vector insn patterns shared between the SPE and > TARGET_PAIRED systems. > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > index 7f366b8..0e110ae 100644 > --- a/gcc/config/rs6000/vsx.md > +++ b/gcc/config/rs6000/vsx.md > @@ -2289,7 +2289,7 @@ > > ;; Vector reduction insns and splitters > > -(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df" > +(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df" > [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa") > (VEC_reduc:V2DF > (vec_concat:V2DF > @@ -2317,7 +2317,7 @@ > [(set_attr "length" "8") > (set_attr "type" "veccomplex")]) > > -(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf" > +(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf" > [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa") > (VEC_reduc:V4SF > (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)