https://gcc.gnu.org/g:2ae8cb4201c2e1f7bd0d03a5ffe4fcc371ddc882
commit r16-4951-g2ae8cb4201c2e1f7bd0d03a5ffe4fcc371ddc882 Author: Guo Jie <[email protected]> Date: Sun Nov 2 11:30:57 2025 +0800 LoongArch: Optimize AND large immediate operation For large immediate values in variable AND operations: if their bit representation has two consecutive 0 sequences, one starting from the MSB can use the bstrpick.d instruction, and the bstrins.d instruction for the other. For example, in the case 'var & 0x3fffffffefffffff': Before: lu12i.w $r12,-65537 ori $r12,$r12,4095 lu52i.d $r12,$r12,0x3ff and $r4,$r4,$r12 After: bstrpick.d $r4,$r4,61,0 bstrins.d $r4,$r0,28,28 gcc/ChangeLog: * config/loongarch/loongarch-protos.h (loongarch_use_bstrins_bstrpick_for_and): New proto. * config/loongarch/loongarch.cc (loongarch_use_bstrins_bstrpick_for_and): Decide whether to optimize. (loongarch_rtx_costs): Adjust the cost of AND operation. * config/loongarch/loongarch.md (bstrins_bstrpick_for_and_imm<mode>): New insn_and_split. gcc/testsuite/ChangeLog: * gcc.target/loongarch/and-large-immediate-opt.c: New test. Diff: --- gcc/config/loongarch/loongarch-protos.h | 1 + gcc/config/loongarch/loongarch.cc | 52 +++++++++++++++++++--- gcc/config/loongarch/loongarch.md | 18 ++++++++ .../gcc.target/loongarch/and-large-immediate-opt.c | 14 ++++++ 4 files changed, 78 insertions(+), 7 deletions(-) diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h index bec436845aa1..e00dd898943a 100644 --- a/gcc/config/loongarch/loongarch-protos.h +++ b/gcc/config/loongarch/loongarch-protos.h @@ -159,6 +159,7 @@ extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); extern bool loongarch_check_zero_div_p (void); extern bool loongarch_pre_reload_split (void); extern int loongarch_use_bstrins_for_ior_with_mask (machine_mode, rtx *); +extern bool loongarch_use_bstrins_bstrpick_for_and (rtx, machine_mode); extern rtx loongarch_rewrite_mem_for_simple_ldst (rtx); union loongarch_gen_fn_ptrs diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index ecc7e21f49c0..6dc2006003e7 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -3798,6 +3798,34 @@ loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode, return 0; } +/* Check if it is possible to optimize AND operation with an immediate: + a. immediate is loaded by more than 1 instruction + b. can use bstrpick.d + bstrins.d. */ + +bool +loongarch_use_bstrins_bstrpick_for_and (rtx op, machine_mode mode) +{ + if (!TARGET_64BIT) + return false; + + /* Avoid aggressive optimization of combine before reload. */ + if (!reload_completed) + return false; + + /* It's meaningless if the OP is not splittable + and skip the cases already supported in AND operation. */ + if (!splittable_const_int_operand (op, mode) || and_operand (op, mode)) + return false; + + int leading_zero_bit = __builtin_clzll (UINTVAL (op)); + unsigned HOST_WIDE_INT mask = (~0ULL) << (64 - leading_zero_bit); + + if (ins_zero_bitmask_operand (GEN_INT (UINTVAL (op) | mask), mode)) + return true; + + return false; +} + /* Return the cost of moving between two registers of mode MODE. */ static int @@ -3917,14 +3945,24 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, return false; case AND: - /* Check for a *clear_upper32 pattern and treat it like a zero - extension. See the pattern's comment for details. */ - if (TARGET_64BIT && mode == DImode && CONST_INT_P (XEXP (x, 1)) - && UINTVAL (XEXP (x, 1)) == 0xffffffff) + if (TARGET_64BIT && mode == DImode && CONST_INT_P (XEXP (x, 1))) { - *total = (loongarch_zero_extend_cost (XEXP (x, 0)) - + set_src_cost (XEXP (x, 0), mode, speed)); - return true; + /* Check for a *clear_upper32 pattern and treat it like a zero + extension. See the pattern's comment for details. */ + if (UINTVAL (XEXP (x, 1)) == 0xffffffff) + { + *total = (loongarch_zero_extend_cost (XEXP (x, 0)) + + set_src_cost (XEXP (x, 0), mode, speed)); + return true; + } + /* Check if it can be done by bstrpick.d and bstrins.d. */ + else if (loongarch_use_bstrins_bstrpick_for_and (XEXP (x, 1), mode)) + { + /* The pattern will be split into 2 insns. */ + *total = (COSTS_N_INSNS (2) + + set_src_cost (XEXP (x, 0), mode, speed)); + return true; + } } /* (AND (NOT op0) (NOT op1) is a nor operation that can be done in a single instruction. */ diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 625f30ca2658..9371134a69db 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -1670,6 +1670,24 @@ DONE; }) +(define_insn_and_split "bstrins_bstrpick_for_and_imm<mode>" + [(set (match_operand:X 0 "register_operand" "=r") + (and:X (match_operand:X 1 "register_operand" "r") + (match_operand:X 2 "const_int_operand" "i")))] + "loongarch_use_bstrins_bstrpick_for_and (operands[2], <MODE>mode)" + "#" + "&& true" + [(const_int 0)] +{ + unsigned HOST_WIDE_INT op2 = INTVAL (operands[2]); + int leading_zero_bit = __builtin_clzll (op2); + unsigned HOST_WIDE_INT mask = (~0ULL) << (64 - leading_zero_bit); + emit_insn (gen_extzv<mode> (operands[0], operands[1], + GEN_INT (64 - leading_zero_bit), const0_rtx)); + emit_insn (gen_and<mode>3 (operands[0], operands[0], GEN_INT (op2 | mask))); +} + [(set_attr "length" "8")]) + (define_insn "*iorhi3" [(set (match_operand:HI 0 "register_operand" "=r,r") (ior:HI (match_operand:HI 1 "register_operand" "%r,r") diff --git a/gcc/testsuite/gcc.target/loongarch/and-large-immediate-opt.c b/gcc/testsuite/gcc.target/loongarch/and-large-immediate-opt.c new file mode 100644 index 000000000000..921bef67fc26 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/and-large-immediate-opt.c @@ -0,0 +1,14 @@ +/* { dg-do compile { target { loongarch64*-*-* } } } */ +/* { dg-options "-O3" } */ +/* { dg-final { scan-assembler-not "\tlu12i.w" } } */ +/* { dg-final { scan-assembler-not "\tori" } } */ +/* { dg-final { scan-assembler-not "\tlu52i.d" } } */ +/* { dg-final { scan-assembler-not "\tand" } } */ +/* { dg-final { scan-assembler "\tbstrpick.d" } } */ +/* { dg-final { scan-assembler "\tbstrins.d" } } */ + +long +test (long a) +{ + return a & 0x3fffffffefffffff; +}
