u16 << 5 and u16 << 6 can be tweaked by using MUL instructions.
Benefit is a better speed ratio with -Os and smaller size with -O2.
No new regressions.
Ok for trunk?
Johann
..
AVR: Tweak some 16-bit shifts by using MUL.
u16 << 5 and u16 << 6 can be tweaked by using MUL instructions.
Benefit is a better speed ratio with -Os and smaller size with -O2.
gcc/
* config/avr/avr-passes.cc (avr_emit_shift) [ASHIFT,HImode]:
Allow offsets 5 and 6 as 3op provided have MUL and a scratch.
* config/avr/avr.cc (avr_optimize_size_max_p): New function.
(avr_out_ashlhi3_mul): New function.
(ashlhi3_out) [case 4, 5, 6]: Better speed for -Os.
* config/avr/avr.md (isa) <mul, no_mul>: New attr values.
(*ashlhi3_const): Add alternative for offsets 5 and 6.
diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 8bf125f12aa..e32c46738d8 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -4951,7 +4951,9 @@ avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch)
const bool b8_is_3op = off == 6;
const bool b16_is_3op = select<bool>()
- : code == ASHIFT ? satisfies_constraint_C7c (xoff) // 7...12
+ : code == ASHIFT ? (satisfies_constraint_C7c (xoff) // 7...12
+ // The "C05 C06" alternative of *ashlhi3_const.
+ || (AVR_HAVE_MUL && scratch && (off == 5 || off == 6)))
: code == LSHIFTRT ? satisfies_constraint_C7c (xoff)
: code == ASHIFTRT ? off == 7
: bad_case<bool> ();
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index ce1a375ce92..e5a5aa34ec0 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -563,7 +563,8 @@ avr_option_override (void)
}
-int avr_optimize_size_level ()
+int
+avr_optimize_size_level ()
{
return cfun && cfun->decl
? opt_for_fn (cfun->decl, optimize_size)
@@ -571,6 +572,13 @@ int avr_optimize_size_level ()
}
+static bool
+avr_optimize_size_max_p ()
+{
+ return avr_optimize_size_level () == OPTIMIZE_SIZE_MAX;
+}
+
+
/* Implement `INIT_EXPANDERS'. */
/* The function works like a singleton. */
@@ -7048,6 +7056,26 @@ ashlqi3_out (rtx_insn *insn, rtx operands[], int *plen)
}
+/* Output a 16-bit left shift XOP[0] = XOP[1] << XOP[2] using MUL.
+ XOP[3] is an upper 8-bit scratch register. This function is currently
+ only used for offsets 5 and 6 but works for offsets 1...7 as well. */
+
+static const char*
+avr_out_ashlhi3_mul (rtx *xop, bool scratch_p, int *plen)
+{
+ gcc_assert (scratch_p && AVR_HAVE_MUL);
+
+ // Takes 7 words and 9 cycles.
+ return avr_asm_len ("ldi %3,1<<%2" CR_TAB
+ "mul %B1,%3" CR_TAB
+ "mov %B0,r0" CR_TAB
+ "mul %A1,%3" CR_TAB
+ "mov %A0,r0" CR_TAB
+ "or %B0,r1" CR_TAB
+ "clr __zero_reg__", xop, plen, -7);
+}
+
+
/* 16bit shift left ((short)x << i) */
const char *
@@ -7060,6 +7088,10 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen)
&& REG_P (operands[3]));
bool ldi_ok = test_hard_reg_class (LD_REGS, operands[0]);
bool reg1_unused_after = reg_unused_after (insn, operands[1]);
+ int size;
+ int reg0 = REGNO (operands[0]);
+ int reg1 = REGNO (operands[1]);
+ bool use_mul_p = reg1 != reg0 || (scratch && AVR_HAVE_MUL);
if (plen)
*plen = 0;
@@ -7073,7 +7105,7 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen)
return avr_asm_len ("clr %B0" CR_TAB
"clr %A0", operands, plen, 2);
case 4:
- if (optimize_size && scratch)
+ if (avr_optimize_size_max_p () && scratch)
break; /* 5 */
if (ldi_ok)
return avr_asm_len ("swap %A0" CR_TAB
@@ -7093,6 +7125,23 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen)
break; /* optimize_size ? 6 : 8 */
case 5:
+ size = (scratch ? 5 : 6) + (reg1 != reg0) * (2 - AVR_HAVE_MOVW);
+ if (avr_optimize_size_max_p () && (size < 7 || !use_mul_p))
+ {
+ if (reg0 != reg1)
+ {
+ if (AVR_HAVE_MOVW)
+ avr_asm_len ("movw %0,%1", operands, plen, 1);
+ else
+ avr_asm_len ("mov %A0,%A1" CR_TAB
+ "mov %B0,%B1", operands, plen, 2);
+ }
+ break; // scratch ? 5 : 6
+ }
+
+ if (use_mul_p)
+ return avr_out_ashlhi3_mul (operands, scratch, plen); // 7
+
if (optimize_size)
break; /* scratch ? 5 : 6 */
if (ldi_ok)
@@ -7117,6 +7166,23 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen)
break; /* 10 */
case 6:
+ size = (scratch ? 5 : 6) + (reg1 != reg0) * (2 - AVR_HAVE_MOVW);
+ if (avr_optimize_size_max_p () && (size < 7 || !use_mul_p))
+ {
+ if (reg0 != reg1)
+ {
+ if (AVR_HAVE_MOVW)
+ avr_asm_len ("movw %0,%1", operands, plen, 1);
+ else
+ avr_asm_len ("mov %A0,%A1" CR_TAB
+ "mov %B0,%B1", operands, plen, 2);
+ }
+ break; // scratch ? 5 : 6
+ }
+
+ if (use_mul_p)
+ return avr_out_ashlhi3_mul (operands, scratch, plen); // 7
+
if (optimize_size)
break; /* scratch ? 5 : 6 */
return avr_asm_len ("clr __tmp_reg__" CR_TAB
@@ -7252,7 +7318,7 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen)
}
out_shift_with_cnt ("lsl %A0" CR_TAB
- "rol %B0", insn, operands, plen, 2);
+ "rol %B0", insn, operands, plen, 2);
return "";
}
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 3a7b029e983..594940c6781 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -184,6 +184,7 @@ (define_attr "adjust_len"
;; elpm : ISA has ELPM but no ELPMX elpmx : ISA has ELPMX
;; no_xmega: non-XMEGA core xmega : XMEGA core
;; no_adiw: ISA has no ADIW, SBIW adiw : ISA has ADIW, SBIW
+;; no_mul: ISA has no MUL mul : ISA has [F]MUL[S[U]]
;; The following ISA attributes are actually not architecture specific,
;; but depend on (optimization) options. This is because the "enabled"
@@ -195,7 +196,7 @@ (define_attr "adjust_len"
(define_attr "isa"
"mov,movw, rjmp,jmp, ijmp,eijmp, lpm,lpmx, elpm,elpmx, no_xmega,xmega,
- no_adiw,adiw,
+ no_adiw,adiw, no_mul,mul,
3op,
standard"
(const_string "standard"))
@@ -246,6 +247,12 @@ (define_attr "enabled" ""
(and (eq_attr "isa" "no_adiw")
(match_test "!AVR_HAVE_ADIW"))
+ (and (eq_attr "isa" "mul")
+ (match_test "AVR_HAVE_MUL"))
+
+ (and (eq_attr "isa" "no_mul")
+ (match_test "!AVR_HAVE_MUL"))
+
(and (eq_attr "isa" "3op")
(match_test "avr_shift_is_3op ()"))
)
@@ -5459,16 +5466,16 @@ (define_peephole2 ; *ashlhi3_const *ashrhi3_const *lshrhi3_const
;; "*ashlhq3_const" "*ashluhq3_const"
;; "*ashlha3_const" "*ashluha3_const"
(define_insn "*ashl<mode>3_const"
- [(set (match_operand:ALL2 0 "register_operand" "=r ,r ,r ,r")
- (ashift:ALL2 (match_operand:ALL2 1 "register_operand" "0 ,r ,r ,0")
- (match_operand:QI 2 "const_int_operand" "LPK,O C7c C15,C2l,n")))
- (clobber (match_scratch:QI 3 "=X ,X ,&d ,&d"))
+ [(set (match_operand:ALL2 0 "register_operand" "=r ,r ,r ,r ,r")
+ (ashift:ALL2 (match_operand:ALL2 1 "register_operand" "0 ,r ,r ,r ,0")
+ (match_operand:QI 2 "const_int_operand" "LPK,O C7c C15,C05 C06,C2l,n")))
+ (clobber (match_scratch:QI 3 "=X ,X ,&d ,&d ,&d"))
(clobber (reg:CC REG_CC))]
"reload_completed"
{
return ashlhi3_out (insn, operands, NULL);
}
- [(set_attr "isa" "*,*,3op,*")
+ [(set_attr "isa" "*,*,mul,3op,*")
(set_attr "length" "10")
(set_attr "adjust_len" "ashlhi")])