https://gcc.gnu.org/g:55d792542d21311e415daee333e2786ac5f150a7

commit r15-7109-g55d792542d21311e415daee333e2786ac5f150a7
Author: Georg-Johann Lay <a...@gjlay.de>
Date:   Tue Jan 21 12:47:40 2025 +0100

    AVR: Tweak some 16-bit shifts by using MUL.
    
    u16 << 5 and u16 << 6 can be tweaked by using MUL instructions.
    Benefit is a better speed ratio with -Os and smaller size with -O2.
    
    gcc/
            * config/avr/avr-passes.cc (avr_emit_shift) [ASHIFT,HImode]:
            Allow offsets 5 and 6 as 3op provided have MUL and a scratch.
            * config/avr/avr.cc (avr_optimize_size_max_p): New function.
            (avr_out_ashlhi3_mul): New function.
            (ashlhi3_out) [case 4, 5, 6]: Better speed for -Os.
            * config/avr/avr.md (isa) <mul, no_mul>: New attr values.
            (*ashlhi3_const): Add alternative for offsets 5 and 6.

Diff:
---
 gcc/config/avr/avr-passes.cc |  4 ++-
 gcc/config/avr/avr.cc        | 72 ++++++++++++++++++++++++++++++++++++++++++--
 gcc/config/avr/avr.md        | 19 ++++++++----
 3 files changed, 85 insertions(+), 10 deletions(-)

diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 8bf125f12aaf..e32c46738d81 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -4951,7 +4951,9 @@ avr_emit_shift (rtx_code code, rtx dest, rtx src, int 
off, rtx scratch)
   const bool b8_is_3op = off == 6;
 
   const bool b16_is_3op = select<bool>()
-    : code == ASHIFT ? satisfies_constraint_C7c (xoff) // 7...12
+    : code == ASHIFT ? (satisfies_constraint_C7c (xoff) // 7...12
+                       // The "C05 C06" alternative of *ashlhi3_const.
+                       || (AVR_HAVE_MUL && scratch && (off == 5 || off == 6)))
     : code == LSHIFTRT ? satisfies_constraint_C7c (xoff)
     : code == ASHIFTRT ? off == 7
     : bad_case<bool> ();
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index ce1a375ce92c..e5a5aa34ec04 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -563,7 +563,8 @@ avr_option_override (void)
 }
 
 
-int avr_optimize_size_level ()
+int
+avr_optimize_size_level ()
 {
   return cfun && cfun->decl
     ? opt_for_fn (cfun->decl, optimize_size)
@@ -571,6 +572,13 @@ int avr_optimize_size_level ()
 }
 
 
+static bool
+avr_optimize_size_max_p ()
+{
+  return avr_optimize_size_level () == OPTIMIZE_SIZE_MAX;
+}
+
+
 /* Implement `INIT_EXPANDERS'.  */
 /* The function works like a singleton.  */
 
@@ -7048,6 +7056,26 @@ ashlqi3_out (rtx_insn *insn, rtx operands[], int *plen)
 }
 
 
+/* Output a 16-bit left shift  XOP[0] = XOP[1] << XOP[2]  using MUL.
+   XOP[3] is an upper 8-bit scratch register.  This function is currently
+   only used for offsets 5 and 6 but works for offsets 1...7 as well.  */
+
+static const char*
+avr_out_ashlhi3_mul (rtx *xop, bool scratch_p, int *plen)
+{
+  gcc_assert (scratch_p && AVR_HAVE_MUL);
+
+  // Takes 7 words and 9 cycles.
+  return avr_asm_len ("ldi %3,1<<%2" CR_TAB
+                     "mul %B1,%3"   CR_TAB
+                     "mov %B0,r0"   CR_TAB
+                     "mul %A1,%3"   CR_TAB
+                     "mov %A0,r0"   CR_TAB
+                     "or  %B0,r1"   CR_TAB
+                     "clr __zero_reg__", xop, plen, -7);
+}
+
+
 /* 16bit shift left ((short)x << i)   */
 
 const char *
@@ -7060,6 +7088,10 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen)
                      && REG_P (operands[3]));
       bool ldi_ok = test_hard_reg_class (LD_REGS, operands[0]);
       bool reg1_unused_after = reg_unused_after (insn, operands[1]);
+      int size;
+      int reg0 = REGNO (operands[0]);
+      int reg1 = REGNO (operands[1]);
+      bool use_mul_p = reg1 != reg0 || (scratch && AVR_HAVE_MUL);
 
       if (plen)
        *plen = 0;
@@ -7073,7 +7105,7 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen)
          return avr_asm_len ("clr %B0" CR_TAB
                              "clr %A0", operands, plen, 2);
        case 4:
-         if (optimize_size && scratch)
+         if (avr_optimize_size_max_p () && scratch)
            break;  /* 5 */
          if (ldi_ok)
            return avr_asm_len ("swap %A0"      CR_TAB
@@ -7093,6 +7125,23 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen)
          break;  /* optimize_size ? 6 : 8 */
 
        case 5:
+         size = (scratch ? 5 : 6) + (reg1 != reg0) * (2 - AVR_HAVE_MOVW);
+         if (avr_optimize_size_max_p () && (size < 7 || !use_mul_p))
+           {
+             if (reg0 != reg1)
+               {
+                 if (AVR_HAVE_MOVW)
+                   avr_asm_len ("movw %0,%1", operands, plen, 1);
+                 else
+                   avr_asm_len ("mov %A0,%A1" CR_TAB
+                                "mov %B0,%B1", operands, plen, 2);
+               }
+             break;  // scratch ? 5 : 6
+           }
+
+         if (use_mul_p)
+           return avr_out_ashlhi3_mul (operands, scratch, plen); // 7
+
          if (optimize_size)
            break;  /* scratch ? 5 : 6 */
          if (ldi_ok)
@@ -7117,6 +7166,23 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen)
          break;  /* 10 */
 
        case 6:
+         size = (scratch ? 5 : 6) + (reg1 != reg0) * (2 - AVR_HAVE_MOVW);
+         if (avr_optimize_size_max_p () && (size < 7 || !use_mul_p))
+           {
+             if (reg0 != reg1)
+               {
+                 if (AVR_HAVE_MOVW)
+                   avr_asm_len ("movw %0,%1", operands, plen, 1);
+                 else
+                   avr_asm_len ("mov %A0,%A1" CR_TAB
+                                "mov %B0,%B1", operands, plen, 2);
+               }
+             break;  // scratch ? 5 : 6
+           }
+
+         if (use_mul_p)
+           return avr_out_ashlhi3_mul (operands, scratch, plen); // 7
+
          if (optimize_size)
            break;  /* scratch ? 5 : 6 */
          return avr_asm_len ("clr __tmp_reg__" CR_TAB
@@ -7252,7 +7318,7 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen)
     }
 
   out_shift_with_cnt ("lsl %A0" CR_TAB
-                         "rol %B0", insn, operands, plen, 2);
+                     "rol %B0", insn, operands, plen, 2);
   return "";
 }
 
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 3a7b029e9835..594940c67819 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -184,6 +184,7 @@
 ;; elpm : ISA has ELPM but no ELPMX      elpmx : ISA has ELPMX
 ;; no_xmega: non-XMEGA core              xmega : XMEGA core
 ;; no_adiw:  ISA has no ADIW, SBIW       adiw  : ISA has ADIW, SBIW
+;; no_mul:   ISA has no MUL              mul   : ISA has [F]MUL[S[U]]
 
 ;; The following ISA attributes are actually not architecture specific,
 ;; but depend on (optimization) options.  This is because the "enabled"
@@ -195,7 +196,7 @@
 
 (define_attr "isa"
   "mov,movw, rjmp,jmp, ijmp,eijmp, lpm,lpmx, elpm,elpmx, no_xmega,xmega,
-   no_adiw,adiw,
+   no_adiw,adiw, no_mul,mul,
    3op,
    standard"
   (const_string "standard"))
@@ -246,6 +247,12 @@
         (and (eq_attr "isa" "no_adiw")
              (match_test "!AVR_HAVE_ADIW"))
 
+        (and (eq_attr "isa" "mul")
+             (match_test "AVR_HAVE_MUL"))
+
+        (and (eq_attr "isa" "no_mul")
+             (match_test "!AVR_HAVE_MUL"))
+
         (and (eq_attr "isa" "3op")
              (match_test "avr_shift_is_3op ()"))
         )
@@ -5459,16 +5466,16 @@
 ;; "*ashlhq3_const"  "*ashluhq3_const"
 ;; "*ashlha3_const"  "*ashluha3_const"
 (define_insn "*ashl<mode>3_const"
-  [(set (match_operand:ALL2 0 "register_operand"              "=r  ,r        
,r  ,r")
-        (ashift:ALL2 (match_operand:ALL2 1 "register_operand"  "0  ,r        
,r  ,0")
-                     (match_operand:QI 2 "const_int_operand"   "LPK,O C7c 
C15,C2l,n")))
-   (clobber (match_scratch:QI 3                               "=X  ,X        
,&d ,&d"))
+  [(set (match_operand:ALL2 0 "register_operand"              "=r  ,r        
,r      ,r  ,r")
+        (ashift:ALL2 (match_operand:ALL2 1 "register_operand"  "0  ,r        
,r      ,r  ,0")
+                     (match_operand:QI 2 "const_int_operand"   "LPK,O C7c 
C15,C05 C06,C2l,n")))
+   (clobber (match_scratch:QI 3                               "=X  ,X        
,&d     ,&d ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashlhi3_out (insn, operands, NULL);
   }
-  [(set_attr "isa" "*,*,3op,*")
+  [(set_attr "isa" "*,*,mul,3op,*")
    (set_attr "length" "10")
    (set_attr "adjust_len" "ashlhi")])

Reply via email to