https://gcc.gnu.org/g:55d792542d21311e415daee333e2786ac5f150a7
commit r15-7109-g55d792542d21311e415daee333e2786ac5f150a7 Author: Georg-Johann Lay <a...@gjlay.de> Date: Tue Jan 21 12:47:40 2025 +0100 AVR: Tweak some 16-bit shifts by using MUL. u16 << 5 and u16 << 6 can be tweaked by using MUL instructions. Benefit is a better speed ratio with -Os and smaller size with -O2. gcc/ * config/avr/avr-passes.cc (avr_emit_shift) [ASHIFT,HImode]: Allow offsets 5 and 6 as 3op provided have MUL and a scratch. * config/avr/avr.cc (avr_optimize_size_max_p): New function. (avr_out_ashlhi3_mul): New function. (ashlhi3_out) [case 4, 5, 6]: Better speed for -Os. * config/avr/avr.md (isa) <mul, no_mul>: New attr values. (*ashlhi3_const): Add alternative for offsets 5 and 6. Diff: --- gcc/config/avr/avr-passes.cc | 4 ++- gcc/config/avr/avr.cc | 72 ++++++++++++++++++++++++++++++++++++++++++-- gcc/config/avr/avr.md | 19 ++++++++---- 3 files changed, 85 insertions(+), 10 deletions(-) diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc index 8bf125f12aaf..e32c46738d81 100644 --- a/gcc/config/avr/avr-passes.cc +++ b/gcc/config/avr/avr-passes.cc @@ -4951,7 +4951,9 @@ avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch) const bool b8_is_3op = off == 6; const bool b16_is_3op = select<bool>() - : code == ASHIFT ? satisfies_constraint_C7c (xoff) // 7...12 + : code == ASHIFT ? (satisfies_constraint_C7c (xoff) // 7...12 + // The "C05 C06" alternative of *ashlhi3_const. + || (AVR_HAVE_MUL && scratch && (off == 5 || off == 6))) : code == LSHIFTRT ? satisfies_constraint_C7c (xoff) : code == ASHIFTRT ? off == 7 : bad_case<bool> (); diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index ce1a375ce92c..e5a5aa34ec04 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -563,7 +563,8 @@ avr_option_override (void) } -int avr_optimize_size_level () +int +avr_optimize_size_level () { return cfun && cfun->decl ? opt_for_fn (cfun->decl, optimize_size) @@ -571,6 +572,13 @@ int avr_optimize_size_level () } +static bool +avr_optimize_size_max_p () +{ + return avr_optimize_size_level () == OPTIMIZE_SIZE_MAX; +} + + /* Implement `INIT_EXPANDERS'. */ /* The function works like a singleton. */ @@ -7048,6 +7056,26 @@ ashlqi3_out (rtx_insn *insn, rtx operands[], int *plen) } +/* Output a 16-bit left shift XOP[0] = XOP[1] << XOP[2] using MUL. + XOP[3] is an upper 8-bit scratch register. This function is currently + only used for offsets 5 and 6 but works for offsets 1...7 as well. */ + +static const char* +avr_out_ashlhi3_mul (rtx *xop, bool scratch_p, int *plen) +{ + gcc_assert (scratch_p && AVR_HAVE_MUL); + + // Takes 7 words and 9 cycles. + return avr_asm_len ("ldi %3,1<<%2" CR_TAB + "mul %B1,%3" CR_TAB + "mov %B0,r0" CR_TAB + "mul %A1,%3" CR_TAB + "mov %A0,r0" CR_TAB + "or %B0,r1" CR_TAB + "clr __zero_reg__", xop, plen, -7); +} + + /* 16bit shift left ((short)x << i) */ const char * @@ -7060,6 +7088,10 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen) && REG_P (operands[3])); bool ldi_ok = test_hard_reg_class (LD_REGS, operands[0]); bool reg1_unused_after = reg_unused_after (insn, operands[1]); + int size; + int reg0 = REGNO (operands[0]); + int reg1 = REGNO (operands[1]); + bool use_mul_p = reg1 != reg0 || (scratch && AVR_HAVE_MUL); if (plen) *plen = 0; @@ -7073,7 +7105,7 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen) return avr_asm_len ("clr %B0" CR_TAB "clr %A0", operands, plen, 2); case 4: - if (optimize_size && scratch) + if (avr_optimize_size_max_p () && scratch) break; /* 5 */ if (ldi_ok) return avr_asm_len ("swap %A0" CR_TAB @@ -7093,6 +7125,23 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen) break; /* optimize_size ? 6 : 8 */ case 5: + size = (scratch ? 5 : 6) + (reg1 != reg0) * (2 - AVR_HAVE_MOVW); + if (avr_optimize_size_max_p () && (size < 7 || !use_mul_p)) + { + if (reg0 != reg1) + { + if (AVR_HAVE_MOVW) + avr_asm_len ("movw %0,%1", operands, plen, 1); + else + avr_asm_len ("mov %A0,%A1" CR_TAB + "mov %B0,%B1", operands, plen, 2); + } + break; // scratch ? 5 : 6 + } + + if (use_mul_p) + return avr_out_ashlhi3_mul (operands, scratch, plen); // 7 + if (optimize_size) break; /* scratch ? 5 : 6 */ if (ldi_ok) @@ -7117,6 +7166,23 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen) break; /* 10 */ case 6: + size = (scratch ? 5 : 6) + (reg1 != reg0) * (2 - AVR_HAVE_MOVW); + if (avr_optimize_size_max_p () && (size < 7 || !use_mul_p)) + { + if (reg0 != reg1) + { + if (AVR_HAVE_MOVW) + avr_asm_len ("movw %0,%1", operands, plen, 1); + else + avr_asm_len ("mov %A0,%A1" CR_TAB + "mov %B0,%B1", operands, plen, 2); + } + break; // scratch ? 5 : 6 + } + + if (use_mul_p) + return avr_out_ashlhi3_mul (operands, scratch, plen); // 7 + if (optimize_size) break; /* scratch ? 5 : 6 */ return avr_asm_len ("clr __tmp_reg__" CR_TAB @@ -7252,7 +7318,7 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen) } out_shift_with_cnt ("lsl %A0" CR_TAB - "rol %B0", insn, operands, plen, 2); + "rol %B0", insn, operands, plen, 2); return ""; } diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 3a7b029e9835..594940c67819 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -184,6 +184,7 @@ ;; elpm : ISA has ELPM but no ELPMX elpmx : ISA has ELPMX ;; no_xmega: non-XMEGA core xmega : XMEGA core ;; no_adiw: ISA has no ADIW, SBIW adiw : ISA has ADIW, SBIW +;; no_mul: ISA has no MUL mul : ISA has [F]MUL[S[U]] ;; The following ISA attributes are actually not architecture specific, ;; but depend on (optimization) options. This is because the "enabled" @@ -195,7 +196,7 @@ (define_attr "isa" "mov,movw, rjmp,jmp, ijmp,eijmp, lpm,lpmx, elpm,elpmx, no_xmega,xmega, - no_adiw,adiw, + no_adiw,adiw, no_mul,mul, 3op, standard" (const_string "standard")) @@ -246,6 +247,12 @@ (and (eq_attr "isa" "no_adiw") (match_test "!AVR_HAVE_ADIW")) + (and (eq_attr "isa" "mul") + (match_test "AVR_HAVE_MUL")) + + (and (eq_attr "isa" "no_mul") + (match_test "!AVR_HAVE_MUL")) + (and (eq_attr "isa" "3op") (match_test "avr_shift_is_3op ()")) ) @@ -5459,16 +5466,16 @@ ;; "*ashlhq3_const" "*ashluhq3_const" ;; "*ashlha3_const" "*ashluha3_const" (define_insn "*ashl<mode>3_const" - [(set (match_operand:ALL2 0 "register_operand" "=r ,r ,r ,r") - (ashift:ALL2 (match_operand:ALL2 1 "register_operand" "0 ,r ,r ,0") - (match_operand:QI 2 "const_int_operand" "LPK,O C7c C15,C2l,n"))) - (clobber (match_scratch:QI 3 "=X ,X ,&d ,&d")) + [(set (match_operand:ALL2 0 "register_operand" "=r ,r ,r ,r ,r") + (ashift:ALL2 (match_operand:ALL2 1 "register_operand" "0 ,r ,r ,r ,0") + (match_operand:QI 2 "const_int_operand" "LPK,O C7c C15,C05 C06,C2l,n"))) + (clobber (match_scratch:QI 3 "=X ,X ,&d ,&d ,&d")) (clobber (reg:CC REG_CC))] "reload_completed" { return ashlhi3_out (insn, operands, NULL); } - [(set_attr "isa" "*,*,3op,*") + [(set_attr "isa" "*,*,mul,3op,*") (set_attr "length" "10") (set_attr "adjust_len" "ashlhi")])