On 10/8/25 09:47, Shreya Munnangi wrote:
> In pr120811, we have cases where GCC is emitting an extra |addi| instruction
> instead of using the 12-bit signed-immediate of |ld|.
> |addi t1, t1, 1 ld t1, 0(t1) |
>
> This problem occurs when |fp -> sp+offset| elimination results in an
> out-of-range constant and we generate an address reload in LRA using
> |addsi|/|adddi| expanders.
>
> We've already adjusted the expanders to widen the set of valid operands to
> allow more constants for the 2nd input operand. These expanders, rather than
> constructing the constant into a register and using an |add| instruction, will
> generate two |addi| instructions (or |shNadd|) during initial RTL generation.
>
> We define a new pattern for cases where we need to access the current frame
> and the offsets are too large. This gets reasonable code out of LRA in a form
> |fold-mem-offsets| can handle, rather than having to wait for |sched2| to do
> the height reduction transformation and leaving in the unnecessary |add|
> instruction in the RTL stream.
>
> To avoid the two |addi| instructions being squashed back together in the
> post-reload combine, we remove the |adddi3_const_sum_of_two_s12| pattern.
>

RIP |adddi3_const_sum_of_two_s12, you served well :-)|

> We are seeing about 100 billion dynamic instructions saved which is about 5%
> on cactuBSSN and a 2% improvement in performance on the BPI.
>
>     PR target/120811
>
> gcc/
>
>     * config/riscv/riscv.cc (synthesize_add): Exchange constant terms when
>
>     generating addi pairs.
>
>     (synthesize_addsi): Similarly.
>
>     * config/riscv/riscv.md (addptr<mode>3): New define_expand.
>     (*add<mode>3_const_sum_of_two_s12): Remove pattern.
>
>
> gcc/testsuite/
>
>     * gcc.target/riscv/add-synthesis-1.c: Adjust const to fit in range.
>
>     * gcc.target/riscv/pr120811.c: Add new test case.
>     * gcc.target/riscv/sum-of-two-s12-const-1.c: Adjust const to fit in range.
>

> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 1aaafff9480..3a096b6630f 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -15408,9 +15408,9 @@ synthesize_add (rtx operands[3])
>  
>        ival -= saturated;
>  
> -      rtx x = gen_rtx_PLUS (word_mode, operands[1], GEN_INT (saturated));
> +      rtx x = gen_rtx_PLUS (word_mode, operands[1], GEN_INT (ival));
>        emit_insn (gen_rtx_SET (operands[0], x));
> -      rtx output = gen_rtx_PLUS (word_mode, operands[0], GEN_INT (ival));
> +      rtx output = gen_rtx_PLUS (word_mode, operands[0], GEN_INT 
> (saturated));

As Jeff mentioned this at Cauldron chat, the swapping of operands (smaller
first) helps with reload outcomes.
Maybe a comment or two to that effect will preserve that info for posterity.

FWIW I'm doing a local build to confirm the numbers as well.

Some minor comments below but consider

Reviewed:by: Vineet Gupta <[email protected]>

Thx,
-Vineet

>        emit_insn (gen_rtx_SET (operands[0], output));
>        return true;
>      }
> @@ -15539,13 +15539,13 @@ synthesize_addsi (rtx operands[3])
>        ival -= saturated;
>  
>        rtx temp = gen_reg_rtx (DImode);  
> -      emit_insn (gen_addsi3_extended (temp, operands[1], GEN_INT 
> (saturated)));
> +      emit_insn (gen_addsi3_extended (temp, operands[1], GEN_INT (ival)));
>        temp = gen_lowpart (SImode, temp);
>        SUBREG_PROMOTED_VAR_P (temp) = 1;
>        SUBREG_PROMOTED_SET (temp, SRP_SIGNED);
>        emit_insn (gen_rtx_SET (operands[0], temp));
>        rtx t = gen_reg_rtx (DImode);
> -      emit_insn (gen_addsi3_extended (t, operands[0], GEN_INT (ival)));
> +      emit_insn (gen_addsi3_extended (t, operands[0], GEN_INT (saturated)));
>        t = gen_lowpart (SImode, t);
>        SUBREG_PROMOTED_VAR_P (t) = 1;
>        SUBREG_PROMOTED_SET (t, SRP_SIGNED);
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index bf9cd589d0d..d4e57e93d34 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -702,6 +702,22 @@ (define_insn "add<mode>3"
>    [(set_attr "type" "fadd")
>     (set_attr "mode" "<UNITMODE>")])
>  
> +(define_expand "addptr<mode>3"
> +  [(set (match_operand:X      0 "register_operand")
> +        (plus:X (match_operand:X 1 "register_operand")
> +                (match_operand      2 "const_int_operand")))]
> +  ""
> +{
> +  gcc_assert (CONST_INT_P (operands[2]));
> +  bool status = synthesize_add (operands);
> +
> +  if (!SMALL_OPERAND (INTVAL (operands[2])))
> +    {
> +      gcc_assert (status);
> +      DONE;
> +    }
> +})
> +
>  (define_insn "*addsi3"
>    [(set (match_operand:SI          0 "register_operand" "=r,r")
>      (plus:SI (match_operand:SI 1 "register_operand" " r,r")
> @@ -763,46 +779,6 @@ (define_insn "*adddi3"
>    [(set_attr "type" "arith")
>     (set_attr "mode" "DI")])
>  
> -;; Special case of adding a reg and constant if latter is sum of two S12
> -;; values (in range -2048 to 2047). Avoid materialized the const and fuse
> -;; into the add (with an additional add for 2nd value). Makes a 3 insn
> -;; sequence into 2 insn.
> -
> -(define_insn_and_split "*add<mode>3_const_sum_of_two_s12"
> -  [(set (match_operand:P     0 "register_operand" "=r,r")
> -    (plus:P (match_operand:P 1 "register_operand" " r,r")
> -        (match_operand:P 2 "const_two_s12"    " MiG,r")))]
> -  "!riscv_reg_frame_related (operands[0])"
> -{
> -  /* operand matching MiG constraint is always meant to be split.  */
> -  if (which_alternative == 0)
> -    return "#";
> -  else
> -    return "add %0,%1,%2";
> -}
> -  ""
> -  [(set (match_dup 0)
> -    (plus:P (match_dup 1) (match_dup 3)))
> -   (set (match_dup 0)
> -    (plus:P (match_dup 0) (match_dup 4)))]
> -{
> -  int val = INTVAL (operands[2]);
> -  if (SUM_OF_TWO_S12_P (val))
> -    {
> -       operands[3] = GEN_INT (2047);
> -       operands[4] = GEN_INT (val - 2047);
> -    }
> -  else if (SUM_OF_TWO_S12_N (val))
> -    {
> -       operands[3] = GEN_INT (-2048);
> -       operands[4] = GEN_INT (val + 2048);
> -    }
> -  else
> -      gcc_unreachable ();
> -}
> -  [(set_attr "type" "arith")
> -   (set_attr "mode" "<P:MODE>")])
> -
>  (define_expand "addv<mode>4"
>    [(set (match_operand:GPR           0 "register_operand" "=r,r")
>      (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
> diff --git a/gcc/testsuite/gcc.target/riscv/add-synthesis-1.c
b/gcc/testsuite/gcc.target/riscv/add-synthesis-1.c
> index 247096cec00..982b2fa7f2b 100644
> --- a/gcc/testsuite/gcc.target/riscv/add-synthesis-1.c
> +++ b/gcc/testsuite/gcc.target/riscv/add-synthesis-1.c
> @@ -25,7 +25,7 @@ T (4100)
>  T (8200)
>  
>  TM (2049)
> -TM (4096)
> +TM (4094)

So it seems -4096 is a special little snowflake.

Prev:
minus1:
    addi    a0,a0,-2048
    addi    a0,a0,-2048
    ret

W/o Patch:
minus1:
    li    a5,-4096
    add    a0,a0,a5
    ret

We are burning an extra register, perhaps something to improve for future ?

>  TM (4100)
>  TM (8200)
>  
> diff --git a/gcc/testsuite/gcc.target/riscv/pr120811.c
b/gcc/testsuite/gcc.target/riscv/pr120811.c

This causes a collision as the the file already sneaked into upstream from 
commit
afdf44154fd "(RISC-V: Only Save/Restore required registers for ILP32E/LP64E)"

> new file mode 100644
> index 00000000000..b8a7347d3c9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr120811.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=rv64gc -mabi=lp64d -fdump-rtl-reload
-fdump-rtl-late_combine2" { target { rv64 } } } */
> +/* { dg-options "-O2 -march=rv32gc -mabi=ilp32d -fdump-rtl-reload
-fdump-rtl-late_combine2" { target { rv32 } } } */
> +/* { dg-skip-if "" { *-*-* } "-fomit-frame-pointer" } */
> +
> +double *a, *b, *ab, *ac, *ad, *ae, *af, *c, *i, *k, *l;
> +int n(int *, int, int, int), d, ag, aj;
> +long ai, e;
> +double f, g, h, o, p;
> +int m;
> +void q() {
> +  long am = n(&d, 0, 1, 0);
> +  for (;;)
> +    for (int j = ag; j; ++j) {
> +      int ao = 0;
> +      for (; ao < aj; ao++) {
> +        long ap = ao + j;
> +        double ar = ad[ap] = ae[ap], az;
> +        switch (m)
> +        case 4: {
> +          o = (&l[ap])[2] - (&l[ap])[3] * ((char *)ap)[am] + (&l[ap])[e] +
> +              (&l[ap])[am];
> +          p = (&l[ap])[ai - 1] + (&ap)[ai] * (&l[ap])[am * ai] +
> +              (&l[ap])[ai * 3];
> +          az = (&b[ap])[1] +
> +               (&b[ap])[3] * i[ap] * (&ap)[203] * ((char *)&i[ap])[ai] -
> +               (&i[ap])[ai * 3];
> +          g = (&i[ap])[e] + (&i[ap])[ai];
> +          h = (&ab[ap])[am] * (&ab[ap])[2] - ((char *)ap)[ai] +
> +              (&ab[ap])[ai] * (&ap)[0] * (&af[ap])[am] * (&af[ap])[e] +
> +              (&ap)[2] + (&af[ap])[e * am] + (&af[ap])[am * 3];
> +        }
> +          ar = az * f * k[ap];
> +        c[ap] = ar;
> +        a[ap] = ac[ap];
> +      }
> +    }
> +}
> +/* { dg-final { scan-rtl-dump-not "const_sum_of_two_s12" "reload" } } */
> +/* { dg-final { scan-rtl-dump-not "const_sum_of_two_s12" "late_combine2" } } 
> */
> +/* { dg-final { scan-assembler
"addi.*\[ats\]\[0-9\]*,sp,\[0-9\]*\n\tld\t.*,2047\(.*\).*" } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/sum-of-two-s12-const-1.c
b/gcc/testsuite/gcc.target/riscv/sum-of-two-s12-const-1.c
> index 4d6d135de5f..ac17ec271be 100644
> --- a/gcc/testsuite/gcc.target/riscv/sum-of-two-s12-const-1.c
> +++ b/gcc/testsuite/gcc.target/riscv/sum-of-two-s12-const-1.c
> @@ -26,7 +26,7 @@ plus3(unsigned long i)
>  long
>  minus1(unsigned long i)
>  {
> -   return i - 4096;
> +   return i - 4094;
>  }
>  
>  long

Reply via email to