For large immediate values in variable AND operations:
if their bit representation has two consecutive 0 sequences,
one starting from the MSB can use the bstrpick.d instruction,
and the bstrins.d instruction for the other.

For example, in the case 'var & 0x3fffffffefffffff':

Before:
    lu12i.w $r12,-65537
    ori     $r12,$r12,4095
    lu52i.d $r12,$r12,0x3ff
    and     $r4,$r4,$r12

After:
    bstrpick.d  $r4,$r4,61,0
    bstrins.d   $r4,$r0,28,28

gcc/ChangeLog:

        * config/loongarch/loongarch-protos.h
        (loongarch_use_bstrins_bstrpick_for_and): New proto.
        * config/loongarch/loongarch.cc
        (loongarch_use_bstrins_bstrpick_for_and): Decide whether
        to optimize.
        (loongarch_rtx_costs): Adjust the cost of AND operation.
        * config/loongarch/loongarch.md
        (bstrins_bstrpick_for_and_imm<mode>): New insn_and_split.

gcc/testsuite/ChangeLog:

        * gcc.target/loongarch/and-large-immediate-opt.c: New test.
---
 gcc/config/loongarch/loongarch-protos.h       |  1 +
 gcc/config/loongarch/loongarch.cc             | 52 ++++++++++++++++---
 gcc/config/loongarch/loongarch.md             | 18 +++++++
 .../loongarch/and-large-immediate-opt.c       | 14 +++++
 4 files changed, 78 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/and-large-immediate-opt.c

diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index bec436845aa..e00dd898943 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -159,6 +159,7 @@ extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, 
HOST_WIDE_INT);
 extern bool loongarch_check_zero_div_p (void);
 extern bool loongarch_pre_reload_split (void);
 extern int loongarch_use_bstrins_for_ior_with_mask (machine_mode, rtx *);
+extern bool loongarch_use_bstrins_bstrpick_for_and (rtx, machine_mode);
 extern rtx loongarch_rewrite_mem_for_simple_ldst (rtx);
 
 union loongarch_gen_fn_ptrs
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index f7ce3aa2999..65a16a7ce6f 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -3803,6 +3803,34 @@ loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode 
mode,
   return 0;
 }
 
+/* Check if it is possible to optimize AND operation with an immediate:
+   a. immediate is loaded by more than 1 instruction
+   b. can use bstrpick.d + bstrins.d.  */
+
+bool
+loongarch_use_bstrins_bstrpick_for_and (rtx op, machine_mode mode)
+{
+  if (!TARGET_64BIT)
+    return false;
+
+  /* Avoid aggressive optimization of combine before reload.  */
+  if (!reload_completed)
+    return false;
+
+  /* It's meaningless if the OP is not splittable
+     and skip the cases already supported in AND operation.  */
+  if (!splittable_const_int_operand (op, mode) || and_operand (op, mode))
+    return false;
+
+  int leading_zero_bit = __builtin_clzll (UINTVAL (op));
+  unsigned HOST_WIDE_INT mask = (~0ULL) << (64 - leading_zero_bit);
+
+  if (ins_zero_bitmask_operand (GEN_INT (UINTVAL (op) | mask), mode))
+    return true;
+
+  return false;
+}
+
 /* Return the cost of moving between two registers of mode MODE.  */
 
 static int
@@ -3922,14 +3950,24 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
       return false;
 
     case AND:
-      /* Check for a *clear_upper32 pattern and treat it like a zero
-        extension.  See the pattern's comment for details.  */
-      if (TARGET_64BIT && mode == DImode && CONST_INT_P (XEXP (x, 1))
-         && UINTVAL (XEXP (x, 1)) == 0xffffffff)
+      if (TARGET_64BIT && mode == DImode && CONST_INT_P (XEXP (x, 1)))
        {
-         *total = (loongarch_zero_extend_cost (XEXP (x, 0))
-                   + set_src_cost (XEXP (x, 0), mode, speed));
-         return true;
+         /* Check for a *clear_upper32 pattern and treat it like a zero
+            extension.  See the pattern's comment for details.  */
+         if (UINTVAL (XEXP (x, 1)) == 0xffffffff)
+           {
+             *total = (loongarch_zero_extend_cost (XEXP (x, 0))
+                       + set_src_cost (XEXP (x, 0), mode, speed));
+             return true;
+           }
+         /* Check if it can be done by bstrpick.d and bstrins.d.  */
+         else if (loongarch_use_bstrins_bstrpick_for_and (XEXP (x, 1), mode))
+           {
+             /* The pattern will be split into 2 insns.  */
+             *total = (COSTS_N_INSNS (2)
+                       + set_src_cost (XEXP (x, 0), mode, speed));
+             return true;
+           }
        }
       /* (AND (NOT op0) (NOT op1) is a nor operation that can be done in
         a single instruction.  */
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 625f30ca265..9371134a69d 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1670,6 +1670,24 @@ (define_peephole2
     DONE;
   })
 
+(define_insn_and_split "bstrins_bstrpick_for_and_imm<mode>"
+  [(set (match_operand:X 0 "register_operand" "=r")
+       (and:X (match_operand:X 1 "register_operand" "r")
+              (match_operand:X 2 "const_int_operand" "i")))]
+  "loongarch_use_bstrins_bstrpick_for_and (operands[2], <MODE>mode)"
+  "#"
+  "&& true"
+  [(const_int 0)]
+{
+  unsigned HOST_WIDE_INT op2 = INTVAL (operands[2]);
+  int leading_zero_bit = __builtin_clzll (op2);
+  unsigned HOST_WIDE_INT mask = (~0ULL) << (64 - leading_zero_bit);
+  emit_insn (gen_extzv<mode> (operands[0], operands[1],
+                             GEN_INT (64 - leading_zero_bit), const0_rtx));
+  emit_insn (gen_and<mode>3 (operands[0], operands[0], GEN_INT (op2 | mask)));
+}
+  [(set_attr "length" "8")])
+
 (define_insn "*iorhi3"
   [(set (match_operand:HI 0 "register_operand" "=r,r")
        (ior:HI (match_operand:HI 1 "register_operand" "%r,r")
diff --git a/gcc/testsuite/gcc.target/loongarch/and-large-immediate-opt.c 
b/gcc/testsuite/gcc.target/loongarch/and-large-immediate-opt.c
new file mode 100644
index 00000000000..921bef67fc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/and-large-immediate-opt.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { loongarch64*-*-* } } } */
+/* { dg-options "-O3" } */
+/* { dg-final { scan-assembler-not "\tlu12i.w" } } */
+/* { dg-final { scan-assembler-not "\tori" } } */
+/* { dg-final { scan-assembler-not "\tlu52i.d" } } */
+/* { dg-final { scan-assembler-not "\tand" } } */
+/* { dg-final { scan-assembler "\tbstrpick.d" } } */
+/* { dg-final { scan-assembler "\tbstrins.d" } } */
+
+long
+test (long a)
+{
+  return a & 0x3fffffffefffffff;
+}
-- 
2.50.0

Reply via email to