On Fri, 2022-05-13 at 12:19 -0400, Michael Meissner wrote:
> Generate vadduqm and vsubuqm for TImode add/subtract
> 
> If the TImode variable is in an Altivec register instead of a GPR
> register, then generate vadduqm and vsubuqm instead of having to move the
> value to the GPR registers and doing the add and subtract with carry
> instructions.  To do this, we have to delay the splitting of the addition
> and subtraction until after register allocation.

Ok.


> 
> I have built this patch on little endian power10, little endian power9, and 
> big
> endian power8 systems.  There were no regressions.  Can I install this patch 
> to
> the GCC 13 master branch?
> 
> 2022-05-13   Michael Meissner  <meiss...@linux.ibm.com>
> 
> gcc/
>       * config/rs6000/rs6000.md (addti3): Generate vadduqm if we are
>       using the Altivec registers.
>       (subti3): Generate vsubuqm if we using the Altivec registers.
>       (negti3): New insn.
> 
> gcc/testsuite/
>       * gcc.target/powerpc/vadduqm-vsubuqm.c: New test.
> ---
>  gcc/config/rs6000/rs6000.md                   | 82 ++++++++++++++-----
>  .../gcc.target/powerpc/vadduqm-vsubuqm.c      | 22 +++++
>  2 files changed, 83 insertions(+), 21 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c
> 
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 83eacec57ba..f120ca0b48d 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -7139,15 +7139,22 @@ (define_expand "feraiseexceptsi"
>  ;;
>  ;; Addti3/subti3 are define_insn_and_splits instead of define_expand, to 
> allow
>  ;; for combine to make things like multiply and add with extend operations.
> +;;
> +;; Also add support in case the 128-bit integer happens to be an Altivec
> +;; register.
> 
>  (define_insn_and_split "addti3"
> -  [(set (match_operand:TI 0 "gpc_reg_operand"               "=&r,r,r")
> -     (plus:TI (match_operand:TI 1 "gpc_reg_operand"       "r, 0,r")
> -              (match_operand:TI 2 "reg_or_short_operand"  "rI,r,0")))
> +  [(set (match_operand:TI 0 "gpc_reg_operand"              "=&r, r,r,v")
> +     (plus:TI (match_operand:TI 1 "gpc_reg_operand"       "r, 0,r,v")
> +              (match_operand:TI 2 "reg_or_short_operand"  "rI,r,0,v")))

Nit..  I still can't tell of the "r, 0,r,v" should be comma-space, or
comma delimited.

Remainder looks OK.  
thanks
-Will



>     (clobber (reg:DI CA_REGNO))]
>    "TARGET_64BIT"
> -  "#"
> -  "&& 1"
> +  "@
> +   #
> +   #
> +   #
> +   vadduqm %0,%1,%2"
> +  "&& reload_completed && int_reg_operand (operands[0], TImode)"
>    [(pc)]
>  {
>    rtx lo0 = gen_lowpart (DImode, operands[0]);
> @@ -7157,27 +7164,27 @@ (define_insn_and_split "addti3"
>    rtx hi1 = gen_highpart (DImode, operands[1]);
>    rtx hi2 = gen_highpart_mode (DImode, TImode, operands[2]);
> 
> -  if (!reg_or_short_operand (lo2, DImode))
> -    lo2 = force_reg (DImode, lo2);
> -  if (!adde_operand (hi2, DImode))
> -    hi2 = force_reg (DImode, hi2);
> -
>    emit_insn (gen_adddi3_carry (lo0, lo1, lo2));
>    emit_insn (gen_adddi3_carry_in (hi0, hi1, hi2));
>    DONE;
>  }
> -  [(set_attr "length" "8")
> +  [(set_attr "length" "8,8,8,*")
> +   (set_attr "isa"    "*,*,*,p8v")
>     (set_attr "type"   "add")
>     (set_attr "size"   "128")])
> 
>  (define_insn_and_split "subti3"
> -  [(set (match_operand:TI 0 "gpc_reg_operand"                "=&r,r,r")
> -     (minus:TI (match_operand:TI 1 "reg_or_short_operand" "rI,0,r")
> -               (match_operand:TI 2 "gpc_reg_operand"      "r, r,0")))
> +  [(set (match_operand:TI 0 "gpc_reg_operand"                "=&r, r,r,v")
> +     (minus:TI (match_operand:TI 1 "reg_or_short_operand"  "rI,0,r,v")
> +               (match_operand:TI 2 "gpc_reg_operand"       "r, r,0,v")))
>     (clobber (reg:DI CA_REGNO))]
>    "TARGET_64BIT"
> -  "#"
> -  "&& 1"
> +  "@
> +   #
> +   #
> +   #
> +   vsubuqm %0,%1,%2"
> +  "&& reload_completed && int_reg_operand (operands[0], TImode)"
>    [(pc)]
>  {
>    rtx lo0 = gen_lowpart (DImode, operands[0]);
> @@ -7187,16 +7194,49 @@ (define_insn_and_split "subti3"
>    rtx hi1 = gen_highpart_mode (DImode, TImode, operands[1]);
>    rtx hi2 = gen_highpart (DImode, operands[2]);
> 
> -  if (!reg_or_short_operand (lo1, DImode))
> -    lo1 = force_reg (DImode, lo1);
> -  if (!adde_operand (hi1, DImode))
> -    hi1 = force_reg (DImode, hi1);
> -
>    emit_insn (gen_subfdi3_carry (lo0, lo2, lo1));
>    emit_insn (gen_subfdi3_carry_in (hi0, hi2, hi1));
>    DONE;
> +}
> +  [(set_attr "length" "8,8,8,*")
> +   (set_attr "isa"    "*,*,*,p8v")
> +   (set_attr "type"   "add")
> +   (set_attr "size"   "128")])
> +
> +;; 128-bit integer negation, normally use GPRs.  If we are using Altivec
> +;; registers, create a 0 and do a vsubuqm.
> +(define_insn_and_split "negti3"
> +  [(set (match_operand:TI 0 "gpc_reg_operand"         "=&r,&v")
> +     (neg:TI (match_operand:TI 1 "gpc_reg_operand"   "r,v")))
> +   (clobber (reg:DI CA_REGNO))]
> +  "TARGET_64BIT"
> +  "#"
> +  "&& reload_completed"
> +  [(pc)]
> +{
> +  rtx dest = operands[0];
> +  rtx src = operands[1];
> +
> +  if (altivec_register_operand (dest, TImode))
> +    {
> +      emit_move_insn (dest, const0_rtx);
> +      emit_insn (gen_subti3 (dest, dest, src));
> +      DONE;
> +    }
> +  else
> +    {
> +      rtx dest_lo = gen_lowpart (DImode, dest);
> +      rtx dest_hi = gen_highpart (DImode, dest);
> +      rtx src_lo = gen_lowpart (DImode, src);
> +      rtx src_hi = gen_highpart (DImode, src);
> +
> +      emit_insn (gen_subfdi3_carry (dest_lo, src_lo, const0_rtx));
> +      emit_insn (gen_subfdi3_carry_in (dest_hi, src_hi, const0_rtx));
> +      DONE;
> +    }
>  }
>    [(set_attr "length" "8")
> +   (set_attr "isa"    "*,p8v")
>     (set_attr "type"   "add")
>     (set_attr "size"   "128")])
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c 
> b/gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c
> new file mode 100644
> index 00000000000..5cb2fe37e9c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +/* { dg-require-effective-target power10_ok } */
> +
> +/* Test that we generate vadduqm and vsubuqm for 128-bit integer add and
> +   subtracts if the value is in an Altivec register.  We use 128-bit divide 
> to
> +   force the register selection to be in an altivec register.  */
> +
> +void
> +test (__int128_t *p,
> +      __int128_t *q,
> +      __int128_t *r,
> +      __int128_t *s,
> +      __int128_t *t)
> +{
> +  *p = (*q + *r) / (*s - *t);        /* vadduqm, vsubuqm, vdivsq.  */
> +}
> +
> +/* { dg-final { scan-assembler {\mvadduqm\M} } } */
> +/* { dg-final { scan-assembler {\mvdivsq\M}  } } */
> +/* { dg-final { scan-assembler {\mvsubuqm\M} } } */
> -- 
> 2.35.3
> 
> 

Reply via email to