https://gcc.gnu.org/g:8010f9802a62912ee149ff44b07bd4cf80ec44fc
commit 8010f9802a62912ee149ff44b07bd4cf80ec44fc Author: Jeff Law <j...@ventanamicro.com> Date: Sat May 17 07:16:50 2025 -0600 [RISC-V] Avoid setting output object more than once in IOR/XOR synthesis While evaluating Shreya's logical AND synthesis work on spec2017 I ran into a code quality regression where combine was failing to eliminate a redundant sign extension. I had a hunch the problem would be with the multiple sets of the same pseudo register in the AND synthesis path. I was right that the problem was multiple sets of the same pseudo, but it was actually some of the splitters in the RISC-V backend that were the culprit. Those multiple sets caused the sign bit tracking code to need to make conservative assumptions thus resulting in failure to eliminate the unnecessary sign extension. So before we start moving on the logical AND patch we're going to do some cleanups. There's multiple moving parts in play. For example, we have splitters which do multiple sets of the output register. Fixing some of those independently would result in a code quality regression. Instead they need some adjustments to or removal of mvconst_internal. Of course getting rid of mvconst_internal will trigger all kinds of code quality regressions right now which ultimately lead back to the need to revamp the logical AND expander. Point being we've got some circular dependencies and breaking them may result in short term code quality regressions. I'll obviously try to avoid those as much as possible. So to start the process this patch adjusts the recently added XOR/IOR synthesis to avoid re-using the destination register. While the reuse was clearly safe from a semantic standpoint, various parts of the compiler can do a better job for pseudos that are only set once. Given this synthesis path should only be active during initial RTL generation, we can create new pseudos at will, so we create a new one for each insn. At the end of the sequence we copy from the last set into the final destination. This has various trivial impacts on the code generation, but the resulting code looks no better or worse to me across spec2017. This has been tested in my tester and is currently bootstrapping on my BPI. Waiting on data from the pre-commit tester before moving forward... gcc/ * config/riscv/riscv.cc (synthesize_ior_xor): Avoid writing operands[0] more than once, use new pseudos instead. (cherry picked from commit 6ecda1972a1e19d23e6dd238c7509c25acf5c914) Diff: --- gcc/config/riscv/riscv.cc | 52 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index b2794252291e..8d84bee46882 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -14267,17 +14267,21 @@ synthesize_ior_xor (rtx_code code, rtx operands[3]) { /* Pre-flipping bits we want to preserve. */ rtx input = operands[1]; + rtx output = NULL_RTX; ival = ~INTVAL (operands[2]); while (ival) { HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival); rtx x = GEN_INT (tmpval); x = gen_rtx_XOR (word_mode, input, x); - emit_insn (gen_rtx_SET (operands[0], x)); - input = operands[0]; + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; ival &= ~tmpval; } + gcc_assert (output); + /* Now flip all the bits, which restores the bits we were preserving. */ rtx x = gen_rtx_NOT (word_mode, input); @@ -14300,23 +14304,29 @@ synthesize_ior_xor (rtx_code code, rtx operands[3]) int msb = BITS_PER_WORD - 1 - clz_hwi (ival); if (msb - lsb + 1 <= 11) { + rtx output = gen_reg_rtx (word_mode); + rtx input = operands[1]; + /* Rotate the source right by LSB bits. */ rtx x = GEN_INT (lsb); - x = gen_rtx_ROTATERT (word_mode, operands[1], x); - emit_insn (gen_rtx_SET (operands[0], x)); + x = gen_rtx_ROTATERT (word_mode, input, x); + emit_insn (gen_rtx_SET (output, x)); + input = output; /* Shift the constant right by LSB bits. */ x = GEN_INT (ival >> lsb); /* Perform the IOR/XOR operation. */ - x = gen_rtx_fmt_ee (code, word_mode, operands[0], x); - emit_insn (gen_rtx_SET (operands[0], x)); + x = gen_rtx_fmt_ee (code, word_mode, input, x); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; /* And rotate left to put everything back in place, we don't have rotate left by a constant, so use rotate right by an adjusted constant. */ x = GEN_INT (BITS_PER_WORD - lsb); - x = gen_rtx_ROTATERT (word_mode, operands[1], x); + x = gen_rtx_ROTATERT (word_mode, input, x); emit_insn (gen_rtx_SET (operands[0], x)); return true; } @@ -14337,22 +14347,28 @@ synthesize_ior_xor (rtx_code code, rtx operands[3]) if ((INTVAL (operands[2]) & HOST_WIDE_INT_UC (0x7ff)) != 0 && msb - lsb + 1 <= 11) { + rtx output = gen_reg_rtx (word_mode); + rtx input = operands[1]; + /* Rotate the source left by ROTCOUNT bits, we don't have rotate left by a constant, so use rotate right by an adjusted constant. */ rtx x = GEN_INT (BITS_PER_WORD - rotcount); - x = gen_rtx_ROTATERT (word_mode, operands[1], x); - emit_insn (gen_rtx_SET (operands[0], x)); + x = gen_rtx_ROTATERT (word_mode, input, x); + emit_insn (gen_rtx_SET (output, x)); + input = output; /* We've already rotated the constant. So perform the IOR/XOR operation. */ x = GEN_INT (ival); - x = gen_rtx_fmt_ee (code, word_mode, operands[0], x); - emit_insn (gen_rtx_SET (operands[0], x)); + x = gen_rtx_fmt_ee (code, word_mode, input, x); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; /* And rotate right to put everything into its proper place. */ x = GEN_INT (rotcount); - x = gen_rtx_ROTATERT (word_mode, operands[0], x); + x = gen_rtx_ROTATERT (word_mode, input, x); emit_insn (gen_rtx_SET (operands[0], x)); return true; } @@ -14374,6 +14390,7 @@ synthesize_ior_xor (rtx_code code, rtx operands[3]) /* Synthesis is better than loading the constant. */ ival = INTVAL (operands[2]); rtx input = operands[1]; + rtx output; /* Emit the [x]ori insn that sets the low 11 bits into the proper state. */ @@ -14381,8 +14398,9 @@ synthesize_ior_xor (rtx_code code, rtx operands[3]) { rtx x = GEN_INT (ival & HOST_WIDE_INT_UC (0x7ff)); x = gen_rtx_fmt_ee (code, word_mode, input, x); - emit_insn (gen_rtx_SET (operands[0], x)); - input = operands[0]; + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; ival &= ~HOST_WIDE_INT_UC (0x7ff); } @@ -14396,10 +14414,12 @@ synthesize_ior_xor (rtx_code code, rtx operands[3]) HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival); rtx x = GEN_INT (tmpval); x = gen_rtx_fmt_ee (code, word_mode, input, x); - emit_insn (gen_rtx_SET (operands[0], x)); - input = operands[0]; + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; ival &= ~tmpval; } + emit_move_insn (operands[0], output); return true; }