https://gcc.gnu.org/g:259f9f2c67458b594fec9eac9df0ddb8a5a27867

commit 259f9f2c67458b594fec9eac9df0ddb8a5a27867
Author: Vineet Gupta <vine...@rivosinc.com>
Date:   Mon May 13 11:46:03 2024 -0700

    RISC-V: avoid LUI based const mat in prologue/epilogue expansion [PR/105733]
    
    If the constant used for stack offset can be expressed as sum of two S12
    values, the constant need not be materialized (in a reg) and instead the
    two S12 bits can be added to instructions involved with frame pointer.
    This avoids burning a register and more importantly can often get down
    to be 2 insn vs. 3.
    
    The prev patches to generally avoid LUI based const materialization didn't
    fix this PR and need this directed fix in funcion prologue/epilogue
    expansion.
    
    This fix doesn't move the neddle for SPEC, at all, but it is still a
    win considering gcc generates one insn fewer than llvm for the test ;-)
    
       gcc-13.1 release   |      gcc 230823     |                   |
                          |    g6619b3d4c15c    |   This patch      |  
clang/llvm
    
---------------------------------------------------------------------------------
    li      t0,-4096     | li    t0,-4096      | addi  sp,sp,-2048 | addi 
sp,sp,-2048
    addi    t0,t0,2016   | addi  t0,t0,2032    | add   sp,sp,-16   | addi 
sp,sp,-32
    li      a4,4096      | add   sp,sp,t0      | add   a5,sp,a0    | add  
a1,sp,16
    add     sp,sp,t0     | addi  a5,sp,-2032   | sb    zero,0(a5)  | add  
a0,a0,a1
    li      a5,-4096     | add   a0,a5,a0      | addi  sp,sp,2032  | sb   
zero,0(a0)
    addi    a4,a4,-2032  | li    t0, 4096      | addi  sp,sp,32    | addi 
sp,sp,2032
    add     a4,a4,a5     | sb    zero,2032(a0) | ret               | addi 
sp,sp,48
    addi    a5,sp,16     | addi  t0,t0,-2032   |                   | ret
    add     a5,a4,a5     | add   sp,sp,t0      |
    add     a0,a5,a0     | ret                 |
    li      t0,4096      |
    sd      a5,8(sp)     |
    sb      zero,2032(a0)|
    addi    t0,t0,-2016  |
    add     sp,sp,t0     |
    ret                  |
    
    gcc/ChangeLog:
            PR target/105733
            * config/riscv/riscv.h: New macros for with aligned offsets.
            * config/riscv/riscv.cc (riscv_split_sum_of_two_s12): New
            function to split a sum of two s12 values into constituents.
            (riscv_expand_prologue): Handle offset being sum of two S12.
            (riscv_expand_epilogue): Ditto.
            * config/riscv/riscv-protos.h (riscv_split_sum_of_two_s12): New.
    
    gcc/testsuite/ChangeLog:
            * gcc.target/riscv/pr105733.c: New Test.
            * gcc.target/riscv/rvv/autovec/vls/spill-1.c: Adjust to not
            expect LUI 4096.
            * gcc.target/riscv/rvv/autovec/vls/spill-2.c: Ditto.
            * gcc.target/riscv/rvv/autovec/vls/spill-3.c: Ditto.
            * gcc.target/riscv/rvv/autovec/vls/spill-4.c: Ditto.
            * gcc.target/riscv/rvv/autovec/vls/spill-5.c: Ditto.
            * gcc.target/riscv/rvv/autovec/vls/spill-6.c: Ditto.
            * gcc.target/riscv/rvv/autovec/vls/spill-7.c: Ditto.
    
    Tested-by: Edwin Lu <e...@rivosinc.com> # pre-commit-CI #1568
    Signed-off-by: Vineet Gupta <vine...@rivosinc.com>
    (cherry picked from commit f9cfc192ed0127edb7e79818917dd2859fce4d44)

Diff:
---
 gcc/config/riscv/riscv-protos.h                    |  2 +
 gcc/config/riscv/riscv.cc                          | 54 ++++++++++++++++++++--
 gcc/config/riscv/riscv.h                           |  7 +++
 gcc/testsuite/gcc.target/riscv/pr105733.c          | 15 ++++++
 .../gcc.target/riscv/rvv/autovec/vls/spill-1.c     |  4 +-
 .../gcc.target/riscv/rvv/autovec/vls/spill-2.c     |  4 +-
 .../gcc.target/riscv/rvv/autovec/vls/spill-3.c     |  4 +-
 .../gcc.target/riscv/rvv/autovec/vls/spill-4.c     |  4 +-
 .../gcc.target/riscv/rvv/autovec/vls/spill-5.c     |  4 +-
 .../gcc.target/riscv/rvv/autovec/vls/spill-6.c     |  4 +-
 .../gcc.target/riscv/rvv/autovec/vls/spill-7.c     |  4 +-
 11 files changed, 89 insertions(+), 17 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index c64aae18deb..0704968561b 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -167,6 +167,8 @@ extern void riscv_subword_address (rtx, rtx *, rtx *, rtx 
*, rtx *);
 extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *);
 extern enum memmodel riscv_union_memmodels (enum memmodel, enum memmodel);
 extern bool riscv_reg_frame_related (rtx);
+extern void riscv_split_sum_of_two_s12 (HOST_WIDE_INT, HOST_WIDE_INT *,
+                                       HOST_WIDE_INT *);
 
 /* Routines implemented in riscv-c.cc.  */
 void riscv_cpu_cpp_builtins (cpp_reader *);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d0c22058b8c..2ecbcf1d0af 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4075,6 +4075,32 @@ riscv_split_doubleword_move (rtx dest, rtx src)
        riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
      }
 }
+
+/* Constant VAL is known to be sum of two S12 constants.  Break it into
+   comprising BASE and OFF.
+   Numerically S12 is -2048 to 2047, however it uses the more conservative
+   range -2048 to 2032 as offsets pertain to stack related registers.  */
+
+void
+riscv_split_sum_of_two_s12 (HOST_WIDE_INT val, HOST_WIDE_INT *base,
+                           HOST_WIDE_INT *off)
+{
+  if (SUM_OF_TWO_S12_N (val))
+    {
+      *base = -2048;
+      *off = val - (-2048);
+    }
+  else if (SUM_OF_TWO_S12_P_ALGN (val))
+    {
+      *base = 2032;
+      *off = val - 2032;
+    }
+  else
+    {
+      gcc_unreachable ();
+    }
+}
+
 
 /* Return the appropriate instructions to move SRC into DEST.  Assume
    that SRC is operand 1 and DEST is operand 0.  */
@@ -7864,6 +7890,17 @@ riscv_expand_prologue (void)
                                GEN_INT (-constant_frame));
          RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
        }
+      else if (SUM_OF_TWO_S12_ALGN (-constant_frame))
+       {
+         HOST_WIDE_INT one, two;
+         riscv_split_sum_of_two_s12 (-constant_frame, &one, &two);
+         insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+                               GEN_INT (one));
+         RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+         insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+                               GEN_INT (two));
+         RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+       }
       else
        {
          riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT 
(-constant_frame));
@@ -8160,10 +8197,21 @@ riscv_expand_epilogue (int style)
 
       /* Get an rtx for STEP1 that we can add to BASE.
         Skip if adjust equal to zero.  */
-      if (step1.to_constant () != 0)
+      HOST_WIDE_INT step1_value = step1.to_constant ();
+      if (step1_value != 0)
        {
-         rtx adjust = GEN_INT (step1.to_constant ());
-         if (!SMALL_OPERAND (step1.to_constant ()))
+         rtx adjust = GEN_INT (step1_value);
+         if (SUM_OF_TWO_S12_ALGN (step1_value))
+           {
+             HOST_WIDE_INT one, two;
+             riscv_split_sum_of_two_s12 (step1_value, &one, &two);
+             insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
+                                               stack_pointer_rtx,
+                                               GEN_INT (one)));
+             RTX_FRAME_RELATED_P (insn) = 1;
+             adjust = GEN_INT (two);
+           }
+         else if (!SMALL_OPERAND (step1_value))
            {
              riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
              adjust = RISCV_PROLOGUE_TEMP (Pmode);
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 0d27c0d378d..d6b14c4d620 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -641,6 +641,13 @@ enum reg_class
 #define SUM_OF_TWO_S12(VALUE)                                          \
   (SUM_OF_TWO_S12_N (VALUE) || SUM_OF_TWO_S12_P (VALUE))
 
+/* Variant with first value 8 byte aligned if involving stack regs.  */
+#define SUM_OF_TWO_S12_P_ALGN(VALUE)                           \
+  (((VALUE) >= (2032 + 1)) && ((VALUE) <= (2032 * 2)))
+
+#define SUM_OF_TWO_S12_ALGN(VALUE)                             \
+  (SUM_OF_TWO_S12_N (VALUE) || SUM_OF_TWO_S12_P_ALGN (VALUE))
+
 /* If this is a single bit mask, then we can load it with bseti.  Special
    handling of SImode 0x80000000 on RV64 is done in riscv_build_integer_1. */
 #define SINGLE_BIT_MASK_OPERAND(VALUE)                                 \
diff --git a/gcc/testsuite/gcc.target/riscv/pr105733.c 
b/gcc/testsuite/gcc.target/riscv/pr105733.c
new file mode 100644
index 00000000000..6156c36dc7e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr105733.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options { -march=rv64gcv -mabi=lp64d } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-Os" "-Oz" } } */
+
+#define BUF_SIZE 2064
+
+void
+foo(unsigned long i)
+{
+    volatile char buf[BUF_SIZE];
+
+    buf[i] = 0;
+}
+
+/* { dg-final { scan-assembler-not {li\t[a-x0-9]+,4096} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-1.c
index b64c73f34f1..6afcf1db593 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-1.c
@@ -129,5 +129,5 @@ spill_12 (int8_t *in, int8_t *out)
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-256} 1 } } */
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-512} 1 } } */
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-1024} 1 } } */
-/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 1 } } */
-/* { dg-final { scan-assembler-times 
{li\t[a-x0-9]+,-4096\s+add\tsp,sp,[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 3 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,2032} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-2.c
index 8fcdca70538..544e8628a27 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-2.c
@@ -120,5 +120,5 @@ spill_11 (int16_t *in, int16_t *out)
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-256} 1 } } */
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-512} 1 } } */
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-1024} 1 } } */
-/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 1 } } */
-/* { dg-final { scan-assembler-times 
{li\t[a-x0-9]+,-4096\s+add\tsp,sp,[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 3 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,2032} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-3.c
index ca296ce02d6..4bfeb07e9ac 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-3.c
@@ -111,5 +111,5 @@ spill_10 (int32_t *in, int32_t *out)
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-256} 1 } } */
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-512} 1 } } */
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-1024} 1 } } */
-/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 1 } } */
-/* { dg-final { scan-assembler-times 
{li\t[a-x0-9]+,-4096\s+add\tsp,sp,[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 3 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,2032} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c
index ef61d9a2c0c..1faf31ffd8e 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c
@@ -102,5 +102,5 @@ spill_9 (int64_t *in, int64_t *out)
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-256} 1 } } */
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-512} 1 } } */
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-1024} 1 } } */
-/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 1 } } */
-/* { dg-final { scan-assembler-times 
{li\t[a-x0-9]+,-4096\s+add\tsp,sp,[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 3 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,2032} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-5.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-5.c
index 150135a9110..0c8dccc518e 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-5.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-5.c
@@ -120,5 +120,5 @@ spill_11 (_Float16 *in, _Float16 *out)
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-256} 1 } } */
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-512} 1 } } */
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-1024} 1 } } */
-/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 1 } } */
-/* { dg-final { scan-assembler-times 
{li\t[a-x0-9]+,-4096\s+add\tsp,sp,[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 3 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,2032} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-6.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-6.c
index c5d2d019434..8bf53b84d1c 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-6.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-6.c
@@ -111,5 +111,5 @@ spill_10 (float *in, float *out)
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-256} 1 } } */
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-512} 1 } } */
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-1024} 1 } } */
-/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 1 } } */
-/* { dg-final { scan-assembler-times 
{li\t[a-x0-9]+,-4096\s+add\tsp,sp,[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 3 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,2032} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c
index 70ca683908d..e3980a29540 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c
@@ -102,5 +102,5 @@ spill_9 (int64_t *in, int64_t *out)
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-256} 1 } } */
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-512} 1 } } */
 /* { dg-final { scan-assembler-times {addi\tsp,sp,-1024} 1 } } */
-/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 1 } } */
-/* { dg-final { scan-assembler-times 
{li\t[a-x0-9]+,-4096\s+add\tsp,sp,[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 3 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,2032} 1 } } */

Reply via email to