https://gcc.gnu.org/g:d203f4cb968a1dc45c7fcd1bf5d7cd485ca6f510
commit r15-5892-gd203f4cb968a1dc45c7fcd1bf5d7cd485ca6f510 Author: Georg-Johann Lay <a...@gjlay.de> Date: Mon Dec 2 12:31:18 2024 +0100 AVR: Tweak uin8_t << 6 and uint8_t >> 6 shifts. Logic 8-bit shifts with an offset of 6 can be improved by supporting them as 3-operand operations. PR target/117726 gcc/ * config/avr/avr-passes.cc (avr_emit_shift): All 8-bit shifts with an offset of 6 have 3-operand alternatives. * config/avr/avr.cc (ashlqi3_out, lshrqi3_out) [case 6]: Implement as 3-operand insn. (avr_rtx_costs_1) [QImode, ASHIFT + LSHIFTRT]: Adjust costs for offset of 6. * config/avr/avr.md (*ashlqi3_split, *ashlqi3) (*lshrqi3_split, *lshrqi3): Add "r,r,C06" alternative. Diff: --- gcc/config/avr/avr-passes.cc | 3 ++ gcc/config/avr/avr.cc | 70 ++++++++++++++++++++++++++++++++++++-------- gcc/config/avr/avr.md | 26 ++++++++-------- 3 files changed, 73 insertions(+), 26 deletions(-) diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc index 7be5ec25fbce..dc98780ef277 100644 --- a/gcc/config/avr/avr-passes.cc +++ b/gcc/config/avr/avr-passes.cc @@ -4899,6 +4899,8 @@ avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch) // Work out which alternatives can handle 3 operands independent // of options. + const bool b8_is_3op = off == 6; + const bool b16_is_3op = select<bool>() : code == ASHIFT ? satisfies_constraint_C7c (xoff) // 7...12 : code == LSHIFTRT ? satisfies_constraint_C7c (xoff) @@ -4914,6 +4916,7 @@ avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch) const bool is_3op = (off % 8 == 0 || off == n_bits - 1 || (code == ASHIFTRT && off == n_bits - 2) + || (n_bits == 8 && b8_is_3op) || (n_bits == 16 && b16_is_3op) || (n_bits == 24 && b24_is_3op)); rtx shift; diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 32028df30a53..ccf9b05bb3e5 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -6780,6 +6780,8 @@ ashlqi3_out (rtx_insn *insn, rtx operands[], int *plen) { if (CONST_INT_P (operands[2])) { + int reg0 = REGNO (operands[0]); + int reg1 = REGNO (operands[1]); bool ldreg_p = test_hard_reg_class (LD_REGS, operands[0]); int offs = INTVAL (operands[2]); @@ -6787,7 +6789,7 @@ ashlqi3_out (rtx_insn *insn, rtx operands[], int *plen) *plen = 0; if (offs <= 3 - || (offs <= 6 && ! ldreg_p)) + || (offs <= 5 && ! ldreg_p)) { for (int i = 0; i < offs; ++i) avr_asm_len ("lsl %0", operands, plen, 1); @@ -6814,10 +6816,28 @@ ashlqi3_out (rtx_insn *insn, rtx operands[], int *plen) "lsl %0" CR_TAB "andi %0,0xe0", operands, plen, 3); case 6: - return avr_asm_len ("swap %0" CR_TAB - "lsl %0" CR_TAB - "lsl %0" CR_TAB - "andi %0,0xc0", operands, plen, 4); + if (ldreg_p && reg0 == reg1) + return avr_asm_len ("swap %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0" CR_TAB + "andi %0,0xc0", operands, plen, 4); + if (ldreg_p && reg0 != reg1 && AVR_HAVE_MUL) + return avr_asm_len ("ldi %0,1<<6" CR_TAB + "mul %0,%1" CR_TAB + "mov %0,r0" CR_TAB + "clr __zero_reg__", operands, plen, 4); + return reg0 != reg1 + ? avr_asm_len ("clr %0" CR_TAB + "bst %1,0" CR_TAB + "bld %0,6" CR_TAB + "bst %1,1" CR_TAB + "bld %0,7", operands, plen, 5) + : avr_asm_len ("lsl %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0", operands, plen, 6); case 7: return avr_asm_len ("bst %1,0" CR_TAB "clr %0" CR_TAB @@ -7663,6 +7683,8 @@ lshrqi3_out (rtx_insn *insn, rtx operands[], int *plen) { if (CONST_INT_P (operands[2])) { + int reg0 = REGNO (operands[0]); + int reg1 = REGNO (operands[1]); bool ldreg_p = test_hard_reg_class (LD_REGS, operands[0]); int offs = INTVAL (operands[2]); @@ -7670,7 +7692,7 @@ lshrqi3_out (rtx_insn *insn, rtx operands[], int *plen) *plen = 0; if (offs <= 3 - || (offs <= 6 && ! ldreg_p)) + || (offs <= 5 && ! ldreg_p)) { for (int i = 0; i < offs; ++i) avr_asm_len ("lsr %0", operands, plen, 1); @@ -7697,10 +7719,28 @@ lshrqi3_out (rtx_insn *insn, rtx operands[], int *plen) "lsr %0" CR_TAB "andi %0,0x7", operands, plen, 3); case 6: - return avr_asm_len ("swap %0" CR_TAB - "lsr %0" CR_TAB - "lsr %0" CR_TAB - "andi %0,0x3", operands, plen, 4); + if (ldreg_p && reg0 == reg1) + return avr_asm_len ("swap %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0" CR_TAB + "andi %0,0x3", operands, plen, 4); + if (ldreg_p && reg0 != reg1 && AVR_HAVE_MUL) + return avr_asm_len ("ldi %0,1<<2" CR_TAB + "mul %0,%1" CR_TAB + "mov %0,r1" CR_TAB + "clr __zero_reg__", operands, plen, 4); + return reg0 != reg1 + ? avr_asm_len ("clr %0" CR_TAB + "bst %1,6" CR_TAB + "bld %0,0" CR_TAB + "bst %1,7" CR_TAB + "bld %0,1", operands, plen, 5) + : avr_asm_len ("lsr %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0", operands, plen, 6); case 7: return avr_asm_len ("bst %1,7" CR_TAB "clr %0" CR_TAB @@ -12528,7 +12568,9 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, { if (val1 == 7) *total = COSTS_N_INSNS (3); - else if (val1 >= 0 && val1 <= 7) + else if (val1 == 6) + *total = COSTS_N_INSNS (5 - AVR_HAVE_MUL); + else if (val1 >= 0 && val1 <= 5) *total = COSTS_N_INSNS (val1); else *total = COSTS_N_INSNS (1); @@ -12688,7 +12730,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, *total = COSTS_N_INSNS (4); else if (val1 == 7) *total = COSTS_N_INSNS (2); - else if (val1 >= 0 && val1 <= 7) + else if (val1 >= 0 && val1 <= 5) *total = COSTS_N_INSNS (val1); else *total = COSTS_N_INSNS (1); @@ -12851,7 +12893,9 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, { if (val1 == 7) *total = COSTS_N_INSNS (3); - else if (val1 >= 0 && val1 <= 7) + else if (val1 == 6) + *total = COSTS_N_INSNS (5 - AVR_HAVE_MUL); + else if (val1 >= 0 && val1 <= 5) *total = COSTS_N_INSNS (val1); else *total = COSTS_N_INSNS (1); diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index e343fb23d079..0c98318c03dc 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -5122,9 +5122,9 @@ ;; "*ashlqi3" ;; "*ashlqq3" "*ashluqq3" (define_insn_and_split "*ashl<mode>3_split" - [(set (match_operand:ALL1 0 "register_operand" "=r,r,r,r,r ,!d,r,r") - (ashift:ALL1 (match_operand:ALL1 1 "register_operand" "0,0,0,0,r ,0 ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,L,P,K,C07,n ,n,Qm")))] + [(set (match_operand:ALL1 0 "register_operand" "=r,r ,r ,r,r") + (ashift:ALL1 (match_operand:ALL1 1 "register_operand" "0,0 ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,C07 C06,n,Qm")))] "" "#" "&& reload_completed" @@ -5134,15 +5134,15 @@ (clobber (reg:CC REG_CC))])]) (define_insn "*ashl<mode>3" - [(set (match_operand:ALL1 0 "register_operand" "=r,r,r,r,r ,!d,r,r") - (ashift:ALL1 (match_operand:ALL1 1 "register_operand" "0,0,0,0,r ,0 ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,L,P,K,C07,n ,n,Qm"))) + [(set (match_operand:ALL1 0 "register_operand" "=r,r ,r ,r,r") + (ashift:ALL1 (match_operand:ALL1 1 "register_operand" "0,0 ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,C07 C06,n,Qm"))) (clobber (reg:CC REG_CC))] "reload_completed" { return ashlqi3_out (insn, operands, NULL); } - [(set_attr "length" "5,0,1,2,3,4,6,9") + [(set_attr "length" "9") (set_attr "adjust_len" "ashlqi")]) ;; "ashlhi3" @@ -5747,9 +5747,9 @@ ;; "*lshrqq3" ;; "*lshruqq3" (define_insn_and_split "*lshr<mode>3_split" - [(set (match_operand:ALL1 0 "register_operand" "=r,r,r,r,r ,!d,r,r") - (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand" "0,0,0,0,r ,0 ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,L,P,K,C07,n ,n,Qm")))] + [(set (match_operand:ALL1 0 "register_operand" "=r,r ,r ,r,r") + (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand" "0,0 ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,C07 C06,n,Qm")))] "" "#" "&& reload_completed" @@ -5759,9 +5759,9 @@ (clobber (reg:CC REG_CC))])]) (define_insn "*lshr<mode>3" - [(set (match_operand:ALL1 0 "register_operand" "=r,r,r,r,r ,!d,r,r") - (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand" "0,0,0,0,r ,0 ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,L,P,K,C07,n ,n,Qm"))) + [(set (match_operand:ALL1 0 "register_operand" "=r,r ,r ,r,r") + (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand" "0,0 ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,C07 C06,n,Qm"))) (clobber (reg:CC REG_CC))] "reload_completed" {