https://gcc.gnu.org/g:6ecda1972a1e19d23e6dd238c7509c25acf5c914

commit r16-709-g6ecda1972a1e19d23e6dd238c7509c25acf5c914
Author: Jeff Law <j...@ventanamicro.com>
Date:   Sat May 17 07:16:50 2025 -0600

    [RISC-V] Avoid setting output object more than once in IOR/XOR synthesis
    
    While evaluating Shreya's logical AND synthesis work on spec2017 I ran into 
a
    code quality regression where combine was failing to eliminate a redundant 
sign
    extension.
    
    I had a hunch the problem would be with the multiple sets of the same pseudo
    register in the AND synthesis path.  I was right that the problem was 
multiple
    sets of the same pseudo, but it was actually some of the splitters in the
    RISC-V backend that were the culprit.  Those multiple sets caused the sign 
bit
    tracking code to need to make conservative assumptions thus resulting in
    failure to eliminate the unnecessary sign extension.
    
    So before we start moving on the logical AND patch we're going to do some
    cleanups.
    
    There's multiple moving parts in play.  For example, we have splitters 
which do
    multiple sets of the output register.  Fixing some of those independently 
would
    result in a code quality regression.  Instead they need some adjustments to 
or
    removal of mvconst_internal.  Of course getting rid of mvconst_internal will
    trigger all kinds of code quality regressions right now which ultimately 
lead
    back to the need to revamp the logical AND expander.  Point being we've got
    some circular dependencies and breaking them may result in short term code
    quality regressions.  I'll obviously try to avoid those as much as possible.
    
    So to start the process this patch adjusts the recently added XOR/IOR 
synthesis
    to avoid re-using the destination register.  While the reuse was clearly 
safe
    from a semantic standpoint, various parts of the compiler can do a better 
job
    for pseudos that are only set once.
    
    Given this synthesis path should only be active during initial RTL 
generation,
    we can create new pseudos at will, so we create a new one for each insn.  At
    the end of the sequence we copy from the last set into the final 
destination.
    
    This has various trivial impacts on the code generation, but the resulting 
code
    looks no better or worse to me across spec2017.
    
    This has been tested in my tester and is currently bootstrapping on my BPI.
    Waiting on data from the pre-commit tester before moving forward...
    
    gcc/
            * config/riscv/riscv.cc (synthesize_ior_xor): Avoid writing
            operands[0] more than once, use new pseudos instead.

Diff:
---
 gcc/config/riscv/riscv.cc | 52 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 16 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index b2794252291e..8d84bee46882 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -14267,17 +14267,21 @@ synthesize_ior_xor (rtx_code code, rtx operands[3])
     {
       /* Pre-flipping bits we want to preserve.  */
       rtx input = operands[1];
+      rtx output = NULL_RTX;
       ival = ~INTVAL (operands[2]);
       while (ival)
        {
          HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
          rtx x = GEN_INT (tmpval);
          x = gen_rtx_XOR (word_mode, input, x);
-         emit_insn (gen_rtx_SET (operands[0], x));
-         input = operands[0];
+         output = gen_reg_rtx (word_mode);
+         emit_insn (gen_rtx_SET (output, x));
+         input = output;
          ival &= ~tmpval;
        }
 
+      gcc_assert (output);
+
       /* Now flip all the bits, which restores the bits we were
         preserving.  */
       rtx x = gen_rtx_NOT (word_mode, input);
@@ -14300,23 +14304,29 @@ synthesize_ior_xor (rtx_code code, rtx operands[3])
       int msb = BITS_PER_WORD - 1 - clz_hwi (ival);
       if (msb - lsb + 1 <= 11)
        {
+         rtx output = gen_reg_rtx (word_mode);
+         rtx input = operands[1];
+
          /* Rotate the source right by LSB bits.  */
          rtx x = GEN_INT (lsb);
-         x = gen_rtx_ROTATERT (word_mode, operands[1], x);
-         emit_insn (gen_rtx_SET (operands[0], x));
+         x = gen_rtx_ROTATERT (word_mode, input, x);
+         emit_insn (gen_rtx_SET (output, x));
+         input = output;
 
          /* Shift the constant right by LSB bits.  */
          x = GEN_INT (ival >> lsb);
 
          /* Perform the IOR/XOR operation.  */
-         x = gen_rtx_fmt_ee (code, word_mode, operands[0], x);
-         emit_insn (gen_rtx_SET (operands[0], x));
+         x = gen_rtx_fmt_ee (code, word_mode, input, x);
+         output = gen_reg_rtx (word_mode);
+         emit_insn (gen_rtx_SET (output, x));
+         input = output;
 
          /* And rotate left to put everything back in place, we don't
             have rotate left by a constant, so use rotate right by
             an adjusted constant.  */
          x = GEN_INT (BITS_PER_WORD - lsb);
-         x = gen_rtx_ROTATERT (word_mode, operands[1], x);
+         x = gen_rtx_ROTATERT (word_mode, input, x);
          emit_insn (gen_rtx_SET (operands[0], x));
          return true;
        }
@@ -14337,22 +14347,28 @@ synthesize_ior_xor (rtx_code code, rtx operands[3])
       if ((INTVAL (operands[2]) & HOST_WIDE_INT_UC (0x7ff)) != 0
          && msb - lsb + 1 <= 11)
        {
+         rtx output = gen_reg_rtx (word_mode);
+         rtx input = operands[1];
+
          /* Rotate the source left by ROTCOUNT bits, we don't have
             rotate left by a constant, so use rotate right by an
             adjusted constant.  */
          rtx x = GEN_INT (BITS_PER_WORD - rotcount);
-         x = gen_rtx_ROTATERT (word_mode, operands[1], x);
-         emit_insn (gen_rtx_SET (operands[0], x));
+         x = gen_rtx_ROTATERT (word_mode, input, x);
+         emit_insn (gen_rtx_SET (output, x));
+         input = output;
 
          /* We've already rotated the constant.  So perform the IOR/XOR
             operation.  */
          x = GEN_INT (ival);
-         x = gen_rtx_fmt_ee (code, word_mode, operands[0], x);
-         emit_insn (gen_rtx_SET (operands[0], x));
+         x = gen_rtx_fmt_ee (code, word_mode, input, x);
+         output = gen_reg_rtx (word_mode);
+         emit_insn (gen_rtx_SET (output, x));
+         input = output;
 
          /* And rotate right to put everything into its proper place.  */
          x = GEN_INT (rotcount);
-         x = gen_rtx_ROTATERT (word_mode, operands[0], x);
+         x = gen_rtx_ROTATERT (word_mode, input, x);
          emit_insn (gen_rtx_SET (operands[0], x));
          return true;
        }
@@ -14374,6 +14390,7 @@ synthesize_ior_xor (rtx_code code, rtx operands[3])
   /* Synthesis is better than loading the constant.  */
   ival = INTVAL (operands[2]);
   rtx input = operands[1];
+  rtx output;
 
   /* Emit the [x]ori insn that sets the low 11 bits into
      the proper state.  */
@@ -14381,8 +14398,9 @@ synthesize_ior_xor (rtx_code code, rtx operands[3])
     {
       rtx x = GEN_INT (ival & HOST_WIDE_INT_UC (0x7ff));
       x = gen_rtx_fmt_ee (code, word_mode, input, x);
-      emit_insn (gen_rtx_SET (operands[0], x));
-      input = operands[0];
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
       ival &= ~HOST_WIDE_INT_UC (0x7ff);
     }
 
@@ -14396,10 +14414,12 @@ synthesize_ior_xor (rtx_code code, rtx operands[3])
       HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
       rtx x = GEN_INT (tmpval);
       x = gen_rtx_fmt_ee (code, word_mode, input, x);
-      emit_insn (gen_rtx_SET (operands[0], x));
-      input = operands[0];
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
       ival &= ~tmpval;
     }
+  emit_move_insn (operands[0], output);
   return true;
 }

Reply via email to