https://gcc.gnu.org/g:6bece8413ef4fa261d45447d262562f57260b0c5

commit 6bece8413ef4fa261d45447d262562f57260b0c5
Author: Shreya Munnangi <smunnan...@ventanamicro.com>
Date:   Tue May 27 06:43:29 2025 -0600

    [RISC-V] Add andi+bclr synthesis
    
    So this patch from Shreya adds the ability to use andi + a series of bclr 
insns
    to synthesize a logical AND, much like we're doing for IOR/XOR using 
ori+bset
    or their xor equivalents.
    
    This would regress from a code quality standpoint if we didn't make some
    adjustments to a handful of define_insn_and_split patterns in the riscv 
backend
    which support the same kind of idioms.
    
    Essentially we turn those define_insn_and_split patterns into the simple
    define_splits they always should have been.  That's been the plan since we
    started down this path -- now is the time to make that change for a subset 
of
    patterns.  It may be the case that when we're finished we may not even need
    those patterns.  That's still TBD.
    
    I'm aware of one minor regression in xalan.  As seen elsewhere, combine
    reconstructs the mask value, uses mvconst_internal to load it into a reg 
then
    an and instruction.  That looks better than the operation synthesis, but 
only
    because of the mvconst_internal little white lie.
    
    This patch does help in a variety of places.  It's fairly common in gimple.c
    from 502.gcc to see cases where we'd use bclr to clear a bit, then set the
    exact same bit a few instructions later.  That was an artifact of using a
    define_insn_and_split -- it wasn't obvious to combine that we had two
    instructions manipulating the same bit.  Now that is obvious to combine and 
the
    redundant operation gets removed.
    
    This has spun in my tester with no regressions on riscv32-elf and 
riscv64-elf.
    Hopefully the baseline for the tester as stepped forward 🙂
    
    gcc/
            * config/riscv/bitmanip.md (andi+bclr splits): Simplified from
            prior define_insn_and_splits.
            * config/riscv/riscv.cc (synthesize_and): Add support for andi+bclr
            sequences.
    
                Co-authored-by: Jeff Law  <j...@ventanamicro.com>
    
    (cherry picked from commit c86125a62d153965a7d7eb17a2bd0d0507326fde)

Diff:
---
 gcc/config/riscv/bitmanip.md | 45 +++++++++++++++----------------------
 gcc/config/riscv/riscv.cc    | 53 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 70 insertions(+), 28 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 85ace285ff0a..21426f496798 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -846,14 +846,12 @@
   [(set_attr "type" "bitmanip")])
 
 ;; In case we have "val & ~IMM" where ~IMM has 2 bits set.
-(define_insn_and_split "*bclri<mode>_nottwobits"
-  [(set (match_operand:X 0 "register_operand" "=r")
-       (and:X (match_operand:X 1 "register_operand" "r")
-              (match_operand:X 2 "const_nottwobits_not_arith_operand" "i")))
-   (clobber (match_scratch:X 3 "=&r"))]
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+       (and:X (match_operand:X 1 "register_operand")
+              (match_operand:X 2 "const_nottwobits_not_arith_operand")))
+   (clobber (match_operand:X 3 "register_operand"))]
   "TARGET_ZBS && !paradoxical_subreg_p (operands[1])"
-  "#"
-  "&& reload_completed"
   [(set (match_dup 3) (and:X (match_dup 1) (match_dup 4)))
    (set (match_dup 0) (and:X (match_dup 3) (match_dup 5)))]
 {
@@ -862,20 +860,17 @@
 
   operands[4] = GEN_INT (~bits | topbit);
   operands[5] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+})
 
 ;; In case of a paradoxical subreg, the sign bit and the high bits are
 ;; not allowed to be changed
-(define_insn_and_split "*bclridisi_nottwobits"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (and:DI (match_operand:DI 1 "register_operand" "r")
-               (match_operand:DI 2 "const_nottwobits_not_arith_operand" "i")))
-   (clobber (match_scratch:DI 3 "=&r"))]
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (and:DI (match_operand:DI 1 "register_operand")
+               (match_operand:DI 2 "const_nottwobits_not_arith_operand")))
+   (clobber (match_operand:DI 3 "register_operand"))]
   "TARGET_64BIT && TARGET_ZBS
    && clz_hwi (~UINTVAL (operands[2])) > 33"
-  "#"
-  "&& reload_completed"
   [(set (match_dup 3) (and:DI (match_dup 1) (match_dup 4)))
    (set (match_dup 0) (and:DI (match_dup 3) (match_dup 5)))]
 {
@@ -884,8 +879,7 @@
 
   operands[4] = GEN_INT (~bits | topbit);
   operands[5] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+})
 
 ;; An outer AND with a constant where bits 31..63 are 0 can be seen as
 ;; a virtual zero extension from 31 to 64 bits.
@@ -1061,14 +1055,12 @@
 [(set_attr "type" "bitmanip")])
 
 ;; Same to use blcri + andi and blcri + bclri
-(define_insn_and_split "*andi<mode>_extrabit"
-  [(set (match_operand:X 0 "register_operand" "=r")
-       (and:X (match_operand:X 1 "register_operand" "r")
-              (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits" "i")))
-   (clobber (match_scratch:X 3 "=&r"))]
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+       (and:X (match_operand:X 1 "register_operand")
+              (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits")))
+   (clobber (match_operand:X 3 "register_operand"))]
   "TARGET_ZBS && !not_single_bit_mask_operand (operands[2], VOIDmode)"
-  "#"
-  "&& reload_completed"
   [(set (match_dup 3) (and:X (match_dup 1) (match_dup 4)))
    (set (match_dup 0) (and:X (match_dup 3) (match_dup 5)))]
 {
@@ -1077,8 +1069,7 @@
 
   operands[4] = GEN_INT (bits | topbit);
   operands[5] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+})
 
 ;; If we have the ZBA extension, then we can clear the upper half of a 64
 ;; bit object with a zext.w.  So if we have AND where the constant would
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 87f20528aebe..2cc69b4458a9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -14622,19 +14622,70 @@ synthesize_and (rtx operands[3])
       return true;
     }
 
+  /* The special cases didn't apply.  It's entirely possible we may
+     want to combine some of the ideas above with bclr, but for now
+     those are deferred until we see them popping up in practice.  */
+
+  unsigned HOST_WIDE_INT ival = ~INTVAL (operands[2]);
+
+  /* Clear as many bits using andi as we can.  */
+  if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0x0)
+    {
+      ival &= ~HOST_WIDE_INT_UC (0x7ff);
+      budget--;
+    }
+
+  /* And handle remaining bits via bclr.  */
+  while (TARGET_ZBS && ival)
+    {
+      unsigned HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+      ival &= ~tmpval;
+      budget--;
+    }
+
   /* If the remaining budget has gone to less than zero, it
      forces the value into a register and performs the AND
      operation.  It returns TRUE to the caller so the caller
      knows code generation is complete.
      FIXME: This is hacked to always be enabled until the last
      patch in the series is enabled.  */
-  if (1)
+  if (ival || budget < 0)
     {
       rtx x = force_reg (word_mode, operands[2]);
       x = gen_rtx_AND (word_mode, operands[1], x);
       emit_insn (gen_rtx_SET (operands[0], x));
       return true;
     }
+
+  /* Synthesis is better than loading the constant.  */
+  ival = ~INTVAL (operands[2]);
+  input = operands[1];
+
+  /* Clear any of the lower 11 bits we need.  */
+  if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0)
+    {
+      rtx x = GEN_INT (~(ival & HOST_WIDE_INT_UC (0x7ff)));
+      x = gen_rtx_AND (word_mode, input, x);
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      ival &= ~HOST_WIDE_INT_UC (0x7ff);
+    }
+
+  /* Clear the rest with bclr.  */
+  while (ival)
+    {
+      unsigned HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+      rtx x = GEN_INT (~tmpval);
+      x = gen_rtx_AND (word_mode, input, x);
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      ival &= ~tmpval;
+    }
+
+  emit_move_insn (operands[0], input);
+  return true;
 }

Reply via email to