On Fri, 2022-05-13 at 12:19 -0400, Michael Meissner wrote: > Generate vadduqm and vsubuqm for TImode add/subtract > > If the TImode variable is in an Altivec register instead of a GPR > register, then generate vadduqm and vsubuqm instead of having to move the > value to the GPR registers and doing the add and subtract with carry > instructions. To do this, we have to delay the splitting of the addition > and subtraction until after register allocation.
Ok. > > I have built this patch on little endian power10, little endian power9, and > big > endian power8 systems. There were no regressions. Can I install this patch > to > the GCC 13 master branch? > > 2022-05-13 Michael Meissner <meiss...@linux.ibm.com> > > gcc/ > * config/rs6000/rs6000.md (addti3): Generate vadduqm if we are > using the Altivec registers. > (subti3): Generate vsubuqm if we using the Altivec registers. > (negti3): New insn. > > gcc/testsuite/ > * gcc.target/powerpc/vadduqm-vsubuqm.c: New test. > --- > gcc/config/rs6000/rs6000.md | 82 ++++++++++++++----- > .../gcc.target/powerpc/vadduqm-vsubuqm.c | 22 +++++ > 2 files changed, 83 insertions(+), 21 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c > > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index 83eacec57ba..f120ca0b48d 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -7139,15 +7139,22 @@ (define_expand "feraiseexceptsi" > ;; > ;; Addti3/subti3 are define_insn_and_splits instead of define_expand, to > allow > ;; for combine to make things like multiply and add with extend operations. > +;; > +;; Also add support in case the 128-bit integer happens to be an Altivec > +;; register. > > (define_insn_and_split "addti3" > - [(set (match_operand:TI 0 "gpc_reg_operand" "=&r,r,r") > - (plus:TI (match_operand:TI 1 "gpc_reg_operand" "r, 0,r") > - (match_operand:TI 2 "reg_or_short_operand" "rI,r,0"))) > + [(set (match_operand:TI 0 "gpc_reg_operand" "=&r, r,r,v") > + (plus:TI (match_operand:TI 1 "gpc_reg_operand" "r, 0,r,v") > + (match_operand:TI 2 "reg_or_short_operand" "rI,r,0,v"))) Nit.. I still can't tell of the "r, 0,r,v" should be comma-space, or comma delimited. Remainder looks OK. thanks -Will > (clobber (reg:DI CA_REGNO))] > "TARGET_64BIT" > - "#" > - "&& 1" > + "@ > + # > + # > + # > + vadduqm %0,%1,%2" > + "&& reload_completed && int_reg_operand (operands[0], TImode)" > [(pc)] > { > rtx lo0 = gen_lowpart (DImode, operands[0]); > @@ -7157,27 +7164,27 @@ (define_insn_and_split "addti3" > rtx hi1 = gen_highpart (DImode, operands[1]); > rtx hi2 = gen_highpart_mode (DImode, TImode, operands[2]); > > - if (!reg_or_short_operand (lo2, DImode)) > - lo2 = force_reg (DImode, lo2); > - if (!adde_operand (hi2, DImode)) > - hi2 = force_reg (DImode, hi2); > - > emit_insn (gen_adddi3_carry (lo0, lo1, lo2)); > emit_insn (gen_adddi3_carry_in (hi0, hi1, hi2)); > DONE; > } > - [(set_attr "length" "8") > + [(set_attr "length" "8,8,8,*") > + (set_attr "isa" "*,*,*,p8v") > (set_attr "type" "add") > (set_attr "size" "128")]) > > (define_insn_and_split "subti3" > - [(set (match_operand:TI 0 "gpc_reg_operand" "=&r,r,r") > - (minus:TI (match_operand:TI 1 "reg_or_short_operand" "rI,0,r") > - (match_operand:TI 2 "gpc_reg_operand" "r, r,0"))) > + [(set (match_operand:TI 0 "gpc_reg_operand" "=&r, r,r,v") > + (minus:TI (match_operand:TI 1 "reg_or_short_operand" "rI,0,r,v") > + (match_operand:TI 2 "gpc_reg_operand" "r, r,0,v"))) > (clobber (reg:DI CA_REGNO))] > "TARGET_64BIT" > - "#" > - "&& 1" > + "@ > + # > + # > + # > + vsubuqm %0,%1,%2" > + "&& reload_completed && int_reg_operand (operands[0], TImode)" > [(pc)] > { > rtx lo0 = gen_lowpart (DImode, operands[0]); > @@ -7187,16 +7194,49 @@ (define_insn_and_split "subti3" > rtx hi1 = gen_highpart_mode (DImode, TImode, operands[1]); > rtx hi2 = gen_highpart (DImode, operands[2]); > > - if (!reg_or_short_operand (lo1, DImode)) > - lo1 = force_reg (DImode, lo1); > - if (!adde_operand (hi1, DImode)) > - hi1 = force_reg (DImode, hi1); > - > emit_insn (gen_subfdi3_carry (lo0, lo2, lo1)); > emit_insn (gen_subfdi3_carry_in (hi0, hi2, hi1)); > DONE; > +} > + [(set_attr "length" "8,8,8,*") > + (set_attr "isa" "*,*,*,p8v") > + (set_attr "type" "add") > + (set_attr "size" "128")]) > + > +;; 128-bit integer negation, normally use GPRs. If we are using Altivec > +;; registers, create a 0 and do a vsubuqm. > +(define_insn_and_split "negti3" > + [(set (match_operand:TI 0 "gpc_reg_operand" "=&r,&v") > + (neg:TI (match_operand:TI 1 "gpc_reg_operand" "r,v"))) > + (clobber (reg:DI CA_REGNO))] > + "TARGET_64BIT" > + "#" > + "&& reload_completed" > + [(pc)] > +{ > + rtx dest = operands[0]; > + rtx src = operands[1]; > + > + if (altivec_register_operand (dest, TImode)) > + { > + emit_move_insn (dest, const0_rtx); > + emit_insn (gen_subti3 (dest, dest, src)); > + DONE; > + } > + else > + { > + rtx dest_lo = gen_lowpart (DImode, dest); > + rtx dest_hi = gen_highpart (DImode, dest); > + rtx src_lo = gen_lowpart (DImode, src); > + rtx src_hi = gen_highpart (DImode, src); > + > + emit_insn (gen_subfdi3_carry (dest_lo, src_lo, const0_rtx)); > + emit_insn (gen_subfdi3_carry_in (dest_hi, src_hi, const0_rtx)); > + DONE; > + } > } > [(set_attr "length" "8") > + (set_attr "isa" "*,p8v") > (set_attr "type" "add") > (set_attr "size" "128")]) > > diff --git a/gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c > b/gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c > new file mode 100644 > index 00000000000..5cb2fe37e9c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target int128 } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > +/* { dg-require-effective-target power10_ok } */ > + > +/* Test that we generate vadduqm and vsubuqm for 128-bit integer add and > + subtracts if the value is in an Altivec register. We use 128-bit divide > to > + force the register selection to be in an altivec register. */ > + > +void > +test (__int128_t *p, > + __int128_t *q, > + __int128_t *r, > + __int128_t *s, > + __int128_t *t) > +{ > + *p = (*q + *r) / (*s - *t); /* vadduqm, vsubuqm, vdivsq. */ > +} > + > +/* { dg-final { scan-assembler {\mvadduqm\M} } } */ > +/* { dg-final { scan-assembler {\mvdivsq\M} } } */ > +/* { dg-final { scan-assembler {\mvsubuqm\M} } } */ > -- > 2.35.3 > >