https://gcc.gnu.org/g:6bece8413ef4fa261d45447d262562f57260b0c5
commit 6bece8413ef4fa261d45447d262562f57260b0c5 Author: Shreya Munnangi <smunnan...@ventanamicro.com> Date: Tue May 27 06:43:29 2025 -0600 [RISC-V] Add andi+bclr synthesis So this patch from Shreya adds the ability to use andi + a series of bclr insns to synthesize a logical AND, much like we're doing for IOR/XOR using ori+bset or their xor equivalents. This would regress from a code quality standpoint if we didn't make some adjustments to a handful of define_insn_and_split patterns in the riscv backend which support the same kind of idioms. Essentially we turn those define_insn_and_split patterns into the simple define_splits they always should have been. That's been the plan since we started down this path -- now is the time to make that change for a subset of patterns. It may be the case that when we're finished we may not even need those patterns. That's still TBD. I'm aware of one minor regression in xalan. As seen elsewhere, combine reconstructs the mask value, uses mvconst_internal to load it into a reg then an and instruction. That looks better than the operation synthesis, but only because of the mvconst_internal little white lie. This patch does help in a variety of places. It's fairly common in gimple.c from 502.gcc to see cases where we'd use bclr to clear a bit, then set the exact same bit a few instructions later. That was an artifact of using a define_insn_and_split -- it wasn't obvious to combine that we had two instructions manipulating the same bit. Now that is obvious to combine and the redundant operation gets removed. This has spun in my tester with no regressions on riscv32-elf and riscv64-elf. Hopefully the baseline for the tester as stepped forward 🙂 gcc/ * config/riscv/bitmanip.md (andi+bclr splits): Simplified from prior define_insn_and_splits. * config/riscv/riscv.cc (synthesize_and): Add support for andi+bclr sequences. Co-authored-by: Jeff Law <j...@ventanamicro.com> (cherry picked from commit c86125a62d153965a7d7eb17a2bd0d0507326fde) Diff: --- gcc/config/riscv/bitmanip.md | 45 +++++++++++++++---------------------- gcc/config/riscv/riscv.cc | 53 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 28 deletions(-) diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 85ace285ff0a..21426f496798 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -846,14 +846,12 @@ [(set_attr "type" "bitmanip")]) ;; In case we have "val & ~IMM" where ~IMM has 2 bits set. -(define_insn_and_split "*bclri<mode>_nottwobits" - [(set (match_operand:X 0 "register_operand" "=r") - (and:X (match_operand:X 1 "register_operand" "r") - (match_operand:X 2 "const_nottwobits_not_arith_operand" "i"))) - (clobber (match_scratch:X 3 "=&r"))] +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (match_operand:X 1 "register_operand") + (match_operand:X 2 "const_nottwobits_not_arith_operand"))) + (clobber (match_operand:X 3 "register_operand"))] "TARGET_ZBS && !paradoxical_subreg_p (operands[1])" - "#" - "&& reload_completed" [(set (match_dup 3) (and:X (match_dup 1) (match_dup 4))) (set (match_dup 0) (and:X (match_dup 3) (match_dup 5)))] { @@ -862,20 +860,17 @@ operands[4] = GEN_INT (~bits | topbit); operands[5] = GEN_INT (~topbit); -} -[(set_attr "type" "bitmanip")]) +}) ;; In case of a paradoxical subreg, the sign bit and the high bits are ;; not allowed to be changed -(define_insn_and_split "*bclridisi_nottwobits" - [(set (match_operand:DI 0 "register_operand" "=r") - (and:DI (match_operand:DI 1 "register_operand" "r") - (match_operand:DI 2 "const_nottwobits_not_arith_operand" "i"))) - (clobber (match_scratch:DI 3 "=&r"))] +(define_split + [(set (match_operand:DI 0 "register_operand") + (and:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "const_nottwobits_not_arith_operand"))) + (clobber (match_operand:DI 3 "register_operand"))] "TARGET_64BIT && TARGET_ZBS && clz_hwi (~UINTVAL (operands[2])) > 33" - "#" - "&& reload_completed" [(set (match_dup 3) (and:DI (match_dup 1) (match_dup 4))) (set (match_dup 0) (and:DI (match_dup 3) (match_dup 5)))] { @@ -884,8 +879,7 @@ operands[4] = GEN_INT (~bits | topbit); operands[5] = GEN_INT (~topbit); -} -[(set_attr "type" "bitmanip")]) +}) ;; An outer AND with a constant where bits 31..63 are 0 can be seen as ;; a virtual zero extension from 31 to 64 bits. @@ -1061,14 +1055,12 @@ [(set_attr "type" "bitmanip")]) ;; Same to use blcri + andi and blcri + bclri -(define_insn_and_split "*andi<mode>_extrabit" - [(set (match_operand:X 0 "register_operand" "=r") - (and:X (match_operand:X 1 "register_operand" "r") - (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits" "i"))) - (clobber (match_scratch:X 3 "=&r"))] +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (match_operand:X 1 "register_operand") + (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits"))) + (clobber (match_operand:X 3 "register_operand"))] "TARGET_ZBS && !not_single_bit_mask_operand (operands[2], VOIDmode)" - "#" - "&& reload_completed" [(set (match_dup 3) (and:X (match_dup 1) (match_dup 4))) (set (match_dup 0) (and:X (match_dup 3) (match_dup 5)))] { @@ -1077,8 +1069,7 @@ operands[4] = GEN_INT (bits | topbit); operands[5] = GEN_INT (~topbit); -} -[(set_attr "type" "bitmanip")]) +}) ;; If we have the ZBA extension, then we can clear the upper half of a 64 ;; bit object with a zext.w. So if we have AND where the constant would diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 87f20528aebe..2cc69b4458a9 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -14622,19 +14622,70 @@ synthesize_and (rtx operands[3]) return true; } + /* The special cases didn't apply. It's entirely possible we may + want to combine some of the ideas above with bclr, but for now + those are deferred until we see them popping up in practice. */ + + unsigned HOST_WIDE_INT ival = ~INTVAL (operands[2]); + + /* Clear as many bits using andi as we can. */ + if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0x0) + { + ival &= ~HOST_WIDE_INT_UC (0x7ff); + budget--; + } + + /* And handle remaining bits via bclr. */ + while (TARGET_ZBS && ival) + { + unsigned HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival); + ival &= ~tmpval; + budget--; + } + /* If the remaining budget has gone to less than zero, it forces the value into a register and performs the AND operation. It returns TRUE to the caller so the caller knows code generation is complete. FIXME: This is hacked to always be enabled until the last patch in the series is enabled. */ - if (1) + if (ival || budget < 0) { rtx x = force_reg (word_mode, operands[2]); x = gen_rtx_AND (word_mode, operands[1], x); emit_insn (gen_rtx_SET (operands[0], x)); return true; } + + /* Synthesis is better than loading the constant. */ + ival = ~INTVAL (operands[2]); + input = operands[1]; + + /* Clear any of the lower 11 bits we need. */ + if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0) + { + rtx x = GEN_INT (~(ival & HOST_WIDE_INT_UC (0x7ff))); + x = gen_rtx_AND (word_mode, input, x); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + ival &= ~HOST_WIDE_INT_UC (0x7ff); + } + + /* Clear the rest with bclr. */ + while (ival) + { + unsigned HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival); + rtx x = GEN_INT (~tmpval); + x = gen_rtx_AND (word_mode, input, x); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + ival &= ~tmpval; + } + + emit_move_insn (operands[0], input); + return true; }