For large immediate values in variable AND operations:
if their bit representation has two consecutive 0 sequences,
one starting from the MSB can use the bstrpick.d instruction,
and the bstrins.d instruction for the other.
For example, in the case 'var & 0x3fffffffefffffff':
Before:
lu12i.w $r12,-65537
ori $r12,$r12,4095
lu52i.d $r12,$r12,0x3ff
and $r4,$r4,$r12
After:
bstrpick.d $r4,$r4,61,0
bstrins.d $r4,$r0,28,28
gcc/ChangeLog:
* config/loongarch/loongarch-protos.h
(loongarch_use_bstrins_bstrpick_for_and): New proto.
* config/loongarch/loongarch.cc
(loongarch_use_bstrins_bstrpick_for_and): Decide whether
to optimize.
(loongarch_rtx_costs): Adjust the cost of AND operation.
* config/loongarch/loongarch.md
(bstrins_bstrpick_for_and_imm<mode>): New insn_and_split.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/and-large-immediate-opt.c: New test.
---
gcc/config/loongarch/loongarch-protos.h | 1 +
gcc/config/loongarch/loongarch.cc | 52 ++++++++++++++++---
gcc/config/loongarch/loongarch.md | 18 +++++++
.../loongarch/and-large-immediate-opt.c | 14 +++++
4 files changed, 78 insertions(+), 7 deletions(-)
create mode 100644
gcc/testsuite/gcc.target/loongarch/and-large-immediate-opt.c
diff --git a/gcc/config/loongarch/loongarch-protos.h
b/gcc/config/loongarch/loongarch-protos.h
index bec436845aa..e00dd898943 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -159,6 +159,7 @@ extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT,
HOST_WIDE_INT);
extern bool loongarch_check_zero_div_p (void);
extern bool loongarch_pre_reload_split (void);
extern int loongarch_use_bstrins_for_ior_with_mask (machine_mode, rtx *);
+extern bool loongarch_use_bstrins_bstrpick_for_and (rtx, machine_mode);
extern rtx loongarch_rewrite_mem_for_simple_ldst (rtx);
union loongarch_gen_fn_ptrs
diff --git a/gcc/config/loongarch/loongarch.cc
b/gcc/config/loongarch/loongarch.cc
index f7ce3aa2999..65a16a7ce6f 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -3803,6 +3803,34 @@ loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode
mode,
return 0;
}
+/* Check if it is possible to optimize AND operation with an immediate:
+ a. immediate is loaded by more than 1 instruction
+ b. can use bstrpick.d + bstrins.d. */
+
+bool
+loongarch_use_bstrins_bstrpick_for_and (rtx op, machine_mode mode)
+{
+ if (!TARGET_64BIT)
+ return false;
+
+ /* Avoid aggressive optimization of combine before reload. */
+ if (!reload_completed)
+ return false;
+
+ /* It's meaningless if the OP is not splittable
+ and skip the cases already supported in AND operation. */
+ if (!splittable_const_int_operand (op, mode) || and_operand (op, mode))
+ return false;
+
+ int leading_zero_bit = __builtin_clzll (UINTVAL (op));
+ unsigned HOST_WIDE_INT mask = (~0ULL) << (64 - leading_zero_bit);
+
+ if (ins_zero_bitmask_operand (GEN_INT (UINTVAL (op) | mask), mode))
+ return true;
+
+ return false;
+}
+
/* Return the cost of moving between two registers of mode MODE. */
static int
@@ -3922,14 +3950,24 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int
outer_code,
return false;
case AND:
- /* Check for a *clear_upper32 pattern and treat it like a zero
- extension. See the pattern's comment for details. */
- if (TARGET_64BIT && mode == DImode && CONST_INT_P (XEXP (x, 1))
- && UINTVAL (XEXP (x, 1)) == 0xffffffff)
+ if (TARGET_64BIT && mode == DImode && CONST_INT_P (XEXP (x, 1)))
{
- *total = (loongarch_zero_extend_cost (XEXP (x, 0))
- + set_src_cost (XEXP (x, 0), mode, speed));
- return true;
+ /* Check for a *clear_upper32 pattern and treat it like a zero
+ extension. See the pattern's comment for details. */
+ if (UINTVAL (XEXP (x, 1)) == 0xffffffff)
+ {
+ *total = (loongarch_zero_extend_cost (XEXP (x, 0))
+ + set_src_cost (XEXP (x, 0), mode, speed));
+ return true;
+ }
+ /* Check if it can be done by bstrpick.d and bstrins.d. */
+ else if (loongarch_use_bstrins_bstrpick_for_and (XEXP (x, 1), mode))
+ {
+ /* The pattern will be split into 2 insns. */
+ *total = (COSTS_N_INSNS (2)
+ + set_src_cost (XEXP (x, 0), mode, speed));
+ return true;
+ }
}
/* (AND (NOT op0) (NOT op1) is a nor operation that can be done in
a single instruction. */
diff --git a/gcc/config/loongarch/loongarch.md
b/gcc/config/loongarch/loongarch.md
index 625f30ca265..9371134a69d 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1670,6 +1670,24 @@ (define_peephole2
DONE;
})
+(define_insn_and_split "bstrins_bstrpick_for_and_imm<mode>"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (and:X (match_operand:X 1 "register_operand" "r")
+ (match_operand:X 2 "const_int_operand" "i")))]
+ "loongarch_use_bstrins_bstrpick_for_and (operands[2], <MODE>mode)"
+ "#"
+ "&& true"
+ [(const_int 0)]
+{
+ unsigned HOST_WIDE_INT op2 = INTVAL (operands[2]);
+ int leading_zero_bit = __builtin_clzll (op2);
+ unsigned HOST_WIDE_INT mask = (~0ULL) << (64 - leading_zero_bit);
+ emit_insn (gen_extzv<mode> (operands[0], operands[1],
+ GEN_INT (64 - leading_zero_bit), const0_rtx));
+ emit_insn (gen_and<mode>3 (operands[0], operands[0], GEN_INT (op2 | mask)));
+}
+ [(set_attr "length" "8")])
+
(define_insn "*iorhi3"
[(set (match_operand:HI 0 "register_operand" "=r,r")
(ior:HI (match_operand:HI 1 "register_operand" "%r,r")
diff --git a/gcc/testsuite/gcc.target/loongarch/and-large-immediate-opt.c
b/gcc/testsuite/gcc.target/loongarch/and-large-immediate-opt.c
new file mode 100644
index 00000000000..921bef67fc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/and-large-immediate-opt.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { loongarch64*-*-* } } } */
+/* { dg-options "-O3" } */
+/* { dg-final { scan-assembler-not "\tlu12i.w" } } */
+/* { dg-final { scan-assembler-not "\tori" } } */
+/* { dg-final { scan-assembler-not "\tlu52i.d" } } */
+/* { dg-final { scan-assembler-not "\tand" } } */
+/* { dg-final { scan-assembler "\tbstrpick.d" } } */
+/* { dg-final { scan-assembler "\tbstrins.d" } } */
+
+long
+test (long a)
+{
+ return a & 0x3fffffffefffffff;
+}