So this patch from Shreya adds the ability to use andi + a series of bclr insns to synthesize a logical AND, much like we're doing for IOR/XOR using ori+bset or their xor equivalents.

This would regress from a code quality standpoint if we didn't make some adjustments to a handful of define_insn_and_split patterns in the riscv backend which support the same kind of idioms.

Essentially we turn those define_insn_and_split patterns into the simple define_splits they always should have been. That's been the plan since we started down this path -- now is the time to make that change for a subset of patterns. It may be the case that when we're finished we may not even need those patterns. That's still TBD.

I'm aware of one minor regression in xalan. As seen elsewhere, combine reconstructs the mask value, uses mvconst_internal to load it into a reg then an and instruction. That looks better than the operation synthesis, but only because of the mvconst_internal little white lie.

This patch does help in a variety of places. It's fairly common in gimple.c from 502.gcc to see cases where we'd use bclr to clear a bit, then set the exact same bit a few instructions later. That was an artifact of using a define_insn_and_split -- it wasn't obvious to combine that we had two instructions manipulating the same bit. Now that is obvious to combine and the redundant operation gets removed.

This has spun in my tester with no regressions on riscv32-elf and riscv64-elf. Hopefully the baseline for the tester as stepped forward :-)


jeff
gcc/
        * config/riscv/bitmanip.md (andi+bclr splits): Simplified from
        prior define_insn_and_splits.
        * config/riscv/riscv.cc (synthesize_and): Add support for andi+bclr
        sequences.  

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 85ace285ff0..21426f49679 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -846,14 +846,12 @@ (define_insn "*bclri<mode>"
   [(set_attr "type" "bitmanip")])
 
 ;; In case we have "val & ~IMM" where ~IMM has 2 bits set.
-(define_insn_and_split "*bclri<mode>_nottwobits"
-  [(set (match_operand:X 0 "register_operand" "=r")
-       (and:X (match_operand:X 1 "register_operand" "r")
-              (match_operand:X 2 "const_nottwobits_not_arith_operand" "i")))
-   (clobber (match_scratch:X 3 "=&r"))]
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+       (and:X (match_operand:X 1 "register_operand")
+              (match_operand:X 2 "const_nottwobits_not_arith_operand")))
+   (clobber (match_operand:X 3 "register_operand"))]
   "TARGET_ZBS && !paradoxical_subreg_p (operands[1])"
-  "#"
-  "&& reload_completed"
   [(set (match_dup 3) (and:X (match_dup 1) (match_dup 4)))
    (set (match_dup 0) (and:X (match_dup 3) (match_dup 5)))]
 {
@@ -862,20 +860,17 @@ (define_insn_and_split "*bclri<mode>_nottwobits"
 
   operands[4] = GEN_INT (~bits | topbit);
   operands[5] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+})
 
 ;; In case of a paradoxical subreg, the sign bit and the high bits are
 ;; not allowed to be changed
-(define_insn_and_split "*bclridisi_nottwobits"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (and:DI (match_operand:DI 1 "register_operand" "r")
-               (match_operand:DI 2 "const_nottwobits_not_arith_operand" "i")))
-   (clobber (match_scratch:DI 3 "=&r"))]
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (and:DI (match_operand:DI 1 "register_operand")
+               (match_operand:DI 2 "const_nottwobits_not_arith_operand")))
+   (clobber (match_operand:DI 3 "register_operand"))]
   "TARGET_64BIT && TARGET_ZBS
    && clz_hwi (~UINTVAL (operands[2])) > 33"
-  "#"
-  "&& reload_completed"
   [(set (match_dup 3) (and:DI (match_dup 1) (match_dup 4)))
    (set (match_dup 0) (and:DI (match_dup 3) (match_dup 5)))]
 {
@@ -884,8 +879,7 @@ (define_insn_and_split "*bclridisi_nottwobits"
 
   operands[4] = GEN_INT (~bits | topbit);
   operands[5] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+})
 
 ;; An outer AND with a constant where bits 31..63 are 0 can be seen as
 ;; a virtual zero extension from 31 to 64 bits.
@@ -1061,14 +1055,12 @@ (define_insn_and_split "*<or_optab>i<mode>_extrabit"
 [(set_attr "type" "bitmanip")])
 
 ;; Same to use blcri + andi and blcri + bclri
-(define_insn_and_split "*andi<mode>_extrabit"
-  [(set (match_operand:X 0 "register_operand" "=r")
-       (and:X (match_operand:X 1 "register_operand" "r")
-              (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits" "i")))
-   (clobber (match_scratch:X 3 "=&r"))]
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+       (and:X (match_operand:X 1 "register_operand")
+              (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits")))
+   (clobber (match_operand:X 3 "register_operand"))]
   "TARGET_ZBS && !not_single_bit_mask_operand (operands[2], VOIDmode)"
-  "#"
-  "&& reload_completed"
   [(set (match_dup 3) (and:X (match_dup 1) (match_dup 4)))
    (set (match_dup 0) (and:X (match_dup 3) (match_dup 5)))]
 {
@@ -1077,8 +1069,7 @@ (define_insn_and_split "*andi<mode>_extrabit"
 
   operands[4] = GEN_INT (bits | topbit);
   operands[5] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+})
 
 ;; If we have the ZBA extension, then we can clear the upper half of a 64
 ;; bit object with a zext.w.  So if we have AND where the constant would
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 24c7acab744..8a770631e1a 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -14621,19 +14621,70 @@ synthesize_and (rtx operands[3])
       return true;
     }
 
+  /* The special cases didn't apply.  It's entirely possible we may
+     want to combine some of the ideas above with bclr, but for now
+     those are deferred until we see them popping up in practice.  */
+
+  unsigned HOST_WIDE_INT ival = ~INTVAL (operands[2]);
+
+  /* Clear as many bits using andi as we can.   */
+  if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0x0)
+    {
+      ival &= ~HOST_WIDE_INT_UC (0x7ff);
+      budget--;
+    }
+
+  /* And handle remaining bits via bclr.  */
+  while (TARGET_ZBS && ival)
+    {
+      unsigned HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+      ival &= ~tmpval;
+      budget--;
+    }
+
   /* If the remaining budget has gone to less than zero, it
      forces the value into a register and performs the AND
      operation.  It returns TRUE to the caller so the caller
      knows code generation is complete.
      FIXME: This is hacked to always be enabled until the last
      patch in the series is enabled.  */
-  if (1)
+  if (ival || budget < 0)
     {
       rtx x = force_reg (word_mode, operands[2]);
       x = gen_rtx_AND (word_mode, operands[1], x);
       emit_insn (gen_rtx_SET (operands[0], x));
       return true;
     }
+
+  /* Synthesis is better than loading the constant.  */
+  ival = ~INTVAL (operands[2]);
+  input = operands[1];
+
+  /* Clear any of the lower 11 bits we need.  */
+  if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0)
+    {
+      rtx x = GEN_INT (~(ival & HOST_WIDE_INT_UC (0x7ff)));
+      x = gen_rtx_AND (word_mode, input, x);
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      ival &= ~HOST_WIDE_INT_UC (0x7ff);
+    }
+
+  /* Clear the rest with bclr.  */
+  while (ival)
+    {
+      unsigned HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+      rtx x = GEN_INT (~tmpval);
+      x = gen_rtx_AND (word_mode, input, x);
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      ival &= ~tmpval;
+    }
+
+  emit_move_insn (operands[0], input);
+  return true;
 }
 
 

Reply via email to