https://gcc.gnu.org/g:f30edd17e62e9474f90785a5915959cd6d8c3f62

commit r15-7151-gf30edd17e62e9474f90785a5915959cd6d8c3f62
Author: Georg-Johann Lay <a...@gjlay.de>
Date:   Wed Jan 22 21:11:22 2025 +0100

    AVR: PR117726 - Tweak 32-bit logical shifts of 25...30 for -Oz.
    
    As it turns out, logical 32-bit shifts with an offset of 25..30 can
    be performed in 7 instructions or less.  This beats the 7 instruc-
    tions required for the default code of a shift loop.
    Plus, with zero overhead, these cases can be 3-operand.
    
    This is only relevant for -Oz because with -Os, 3op shifts are
    split with -msplit-bit-shift (which is not performed with -Oz).
    
            PR target/117726
    gcc/
            * config/avr/avr.cc (avr_ld_regno_p): New function.
            (ashlsi3_out) [case 25,26,27,28,29,30]: Handle and tweak.
            (lshrsi3_out): Same.
            (avr_rtx_costs_1) [SImode, ASHIFT, LSHIFTRT]: Adjust costs.
            * config/avr/avr.md (ashlsi3, *ashlsi3, *ashlsi3_const):
            Add "r,r,C4L" alternative.
            (lshrsi3, *lshrsi3, *lshrsi3_const): Add "r,r,C4R" alternative.
            * config/avr/constraints.md (C4R, C4L): New,
    gcc/testsuite/
            * gcc.target/avr/torture/avr-torture.exp (AVR_TORTURE_OPTIONS):
            Turn one option variant into -Oz.

Diff:
---
 gcc/config/avr/avr.cc                              | 163 ++++++++++++++++++---
 gcc/config/avr/avr.md                              |  40 ++---
 gcc/config/avr/constraints.md                      |   9 ++
 .../gcc.target/avr/torture/avr-torture.exp         |   2 +-
 4 files changed, 176 insertions(+), 38 deletions(-)

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index e5a5aa34ec04..8628a438ab56 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -418,6 +418,15 @@ avr_adiw_reg_p (rtx reg)
 }
 
 
+/* Return true iff REGNO is in R16...R31.  */
+
+static bool
+avr_ld_regno_p (int regno)
+{
+  return TEST_HARD_REG_CLASS (LD_REGS, regno);
+}
+
+
 static bool
 ra_in_progress ()
 {
@@ -7397,17 +7406,20 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
+      int off = INTVAL (operands[2]);
       int reg0 = true_regnum (operands[0]);
       int reg1 = true_regnum (operands[1]);
       bool reg1_unused_after = reg_unused_after (insn, operands[1]);
-
+      bool scratch_p = (GET_CODE (PATTERN (insn)) == PARALLEL
+                       && XVECLEN (PATTERN (insn), 0) == 3
+                       && REG_P (operands[3]));
       if (plen)
        *plen = 0;
 
-      switch (INTVAL (operands[2]))
+      switch (off)
        {
        default:
-         if (INTVAL (operands[2]) < 32)
+         if (off < 32)
            break;
 
          return AVR_HAVE_MOVW
@@ -7461,11 +7473,58 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen)
                           "mov %D0,%B1"  CR_TAB
                           "clr %B0"      CR_TAB
                           "clr %A0", operands, plen, 4);
+       case 30:
+         if (AVR_HAVE_MUL && scratch_p)
+           return avr_asm_len ("ldi %3,1<<6"       CR_TAB
+                               "mul %3,%A1"        CR_TAB
+                               "mov %D0,r0"        CR_TAB
+                               "clr __zero_reg__"  CR_TAB
+                               "clr %C0"           CR_TAB
+                               "clr %B0"           CR_TAB
+                               "clr %A0", operands, plen, 7);
+         // Fallthrough
+
+       case 28:
+       case 29:
+         {
+           const bool ld_reg0_p = avr_ld_regno_p (reg0 + 3); // %D0
+           const bool ld_reg1_p = avr_ld_regno_p (reg1 + 0); // %A1
+           if (ld_reg0_p
+               || (ld_reg1_p && reg1_unused_after)
+               || scratch_p)
+             {
+               if (ld_reg0_p)
+                 avr_asm_len ("mov %D0,%A1"    CR_TAB
+                              "swap %D0"       CR_TAB
+                              "andi %D0,0xf0", operands, plen, 3);
+               else if (ld_reg1_p && reg1_unused_after)
+                 avr_asm_len ("swap %A1"       CR_TAB
+                              "andi %A1,0xf0"  CR_TAB
+                              "mov %D0,%A1", operands, plen, 3);
+               else
+                 avr_asm_len ("mov %D0,%A1"    CR_TAB
+                              "swap %D0"       CR_TAB
+                              "ldi %3,0xf0"    CR_TAB
+                              "and %D0,%3", operands, plen, 4);
+               for (int i = 28; i < off; ++i)
+                 avr_asm_len ("lsl %D0", operands, plen, 1);
+               return avr_asm_len ("clr %C0"  CR_TAB
+                                   "clr %B0"  CR_TAB
+                                   "clr %A0", operands, plen, 3);
+             }
+         }
+         // Fallthrough
+
        case 24:
-         return avr_asm_len ("mov %D0,%A1"  CR_TAB
-                             "clr %C0"      CR_TAB
+       case 25:
+       case 26:
+       case 27:
+         avr_asm_len ("mov %D0,%A1", operands, plen, 1);
+         for (int i = 24; i < off; ++i)
+           avr_asm_len ("lsl %D0", operands, plen, 1);
+         return avr_asm_len ("clr %C0"      CR_TAB
                              "clr %B0"      CR_TAB
-                             "clr %A0", operands, plen, 4);
+                             "clr %A0", operands, plen, 3);
        case 31:
          return AVR_HAVE_MOVW
            ? avr_asm_len ("bst %A1,0"    CR_TAB
@@ -8298,17 +8357,20 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
+      int off = INTVAL (operands[2]);
       int reg0 = true_regnum (operands[0]);
       int reg1 = true_regnum (operands[1]);
       bool reg1_unused_after = reg_unused_after (insn, operands[1]);
-
+      bool scratch_p = (GET_CODE (PATTERN (insn)) == PARALLEL
+                       && XVECLEN (PATTERN (insn), 0) == 3
+                       && REG_P (operands[3]));
       if (plen)
        *plen = 0;
 
-      switch (INTVAL (operands[2]))
+      switch (off)
        {
        default:
-         if (INTVAL (operands[2]) < 32)
+         if (off < 32)
            break;
 
          return AVR_HAVE_MOVW
@@ -8362,11 +8424,58 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *plen)
                           "mov %A0,%C1" CR_TAB
                           "clr %C0"     CR_TAB
                           "clr %D0", operands, plen, 4);
+       case 30:
+         if (AVR_HAVE_MUL && scratch_p)
+           return avr_asm_len ("ldi %3,1<<2"       CR_TAB
+                               "mul %3,%D1"        CR_TAB
+                               "mov %A0,r1"        CR_TAB
+                               "clr __zero_reg__"  CR_TAB
+                               "clr %B0"           CR_TAB
+                               "clr %C0"           CR_TAB
+                               "clr %D0", operands, plen, 7);
+         // Fallthrough
+
+       case 29:
+       case 28:
+         {
+           const bool ld_reg0_p = avr_ld_regno_p (reg0 + 0); // %A0
+           const bool ld_reg1_p = avr_ld_regno_p (reg1 + 3); // %D1
+           if (ld_reg0_p
+               || (ld_reg1_p && reg1_unused_after)
+               || scratch_p)
+             {
+               if (ld_reg0_p)
+                 avr_asm_len ("mov %A0,%D1"    CR_TAB
+                              "swap %A0"       CR_TAB
+                              "andi %A0,0x0f", operands, plen, 3);
+               else if (ld_reg1_p && reg1_unused_after)
+                 avr_asm_len ("swap %D1"       CR_TAB
+                              "andi %D1,0x0f"  CR_TAB
+                              "mov %A0,%D1", operands, plen, 3);
+               else
+                 avr_asm_len ("mov %A0,%D1"    CR_TAB
+                              "swap %A0"       CR_TAB
+                              "ldi %3,0x0f"    CR_TAB
+                              "and %A0,%3", operands, plen, 4);
+               for (int i = 28; i < off; ++i)
+                 avr_asm_len ("lsr %A0", operands, plen, 1);
+               return avr_asm_len ("clr %B0"  CR_TAB
+                                   "clr %C0"  CR_TAB
+                                   "clr %D0", operands, plen, 3);
+             }
+         }
+         // Fallthrough
+
+       case 27:
+       case 26:
+       case 25:
        case 24:
-         return avr_asm_len ("mov %A0,%D1" CR_TAB
-                             "clr %B0"     CR_TAB
+         avr_asm_len ("mov %A0,%D1", operands, plen, 1);
+         for (int i = 24; i < off; ++i)
+           avr_asm_len ("lsr %A0", operands, plen, 1);
+         return avr_asm_len ("clr %B0"     CR_TAB
                              "clr %C0"     CR_TAB
-                             "clr %D0", operands, plen, 4);
+                             "clr %D0", operands, plen, 3);
        case 31:
          return AVR_HAVE_MOVW
            ? avr_asm_len ("bst %D1,7"    CR_TAB
@@ -13037,9 +13146,6 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int 
outer_code,
              case 0:
                *total = 0;
                break;
-             case 24:
-               *total = COSTS_N_INSNS (3);
-               break;
              case 1:
              case 8:
                *total = COSTS_N_INSNS (4);
@@ -13050,6 +13156,19 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int 
outer_code,
              case 16:
                *total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW);
                break;
+             case 24:
+             case 25:
+             case 26:
+             case 27:
+               *total = COSTS_N_INSNS (4 + val1 - 24);
+               break;
+             case 28:
+             case 29:
+               *total = COSTS_N_INSNS (6 + val1 - 28);
+               break;
+             case 30:
+               *total = COSTS_N_INSNS (!speed && AVR_HAVE_MUL ? 7 : 8);
+               break;
              case 31:
                *total = COSTS_N_INSNS (6);
                break;
@@ -13346,6 +13465,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int 
outer_code,
                *total = 0;
                break;
              case 1:
+             case 8:
                *total = COSTS_N_INSNS (4);
                break;
              case 2:
@@ -13357,9 +13477,18 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int 
outer_code,
              case 16:
                *total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW);
                break;
-             case 8:
              case 24:
-               *total = COSTS_N_INSNS (4);
+             case 25:
+             case 26:
+             case 27:
+               *total = COSTS_N_INSNS (4 + val1 - 24);
+               break;
+             case 28:
+             case 29:
+               *total = COSTS_N_INSNS (6 + val1 - 28);
+               break;
+             case 30:
+               *total = COSTS_N_INSNS (!speed && AVR_HAVE_MUL ? 7 : 8);
                break;
              case 31:
                *total = COSTS_N_INSNS (6);
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 594940c67819..6550fadd1017 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -5363,9 +5363,9 @@
 ;; "ashlsq3"  "ashlusq3"
 ;; "ashlsa3"  "ashlusa3"
 (define_insn_and_split "ashl<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r      
  ,r  ,r,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r      
  ,r  ,0,0")
-                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C15 
C31,C4l,n,Qm")))]
+  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r    
,r  ,r,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    
,r  ,0,0")
+                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O 
C4L,C4l,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
@@ -5377,9 +5377,9 @@
   [(set_attr "isa" "*,*,*,3op,*,*")])
 
 (define_insn "*ashl<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r      
  ,r  ,r,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r      
  ,r  ,0,0")
-                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C15 
C31,C4l,n,Qm")))
+  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r    
,r  ,r,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    
,r  ,0,0")
+                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O 
C4L,C4l,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
@@ -5564,10 +5564,10 @@
 ;; "*ashlsq3_const"  "*ashlusq3_const"
 ;; "*ashlsa3_const"  "*ashlusa3_const"
 (define_insn "*ashl<mode>3_const"
-  [(set (match_operand:ALL4 0 "register_operand"             "=r ,r        ,r  
,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r        ,r  
,0")
-                     (match_operand:QI 2 "const_int_operand"  "LP,O C15 
C31,C4l,n")))
-   (clobber (match_operand:QI 3 "scratch_or_dreg_operand"    "=X ,X        ,&d 
,&d"))
+  [(set (match_operand:ALL4 0 "register_operand"             "=r ,r    ,r  ,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r    ,r  ,0")
+                     (match_operand:QI 2 "const_int_operand"  "LP,O 
C4L,C4l,n")))
+   (clobber (match_operand:QI 3 "scratch_or_dreg_operand"    "=X ,X    ,&d 
,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
@@ -5955,9 +5955,9 @@
 ;; "lshrsq3"  "lshrusq3"
 ;; "lshrsa3"  "lshrusa3"
 (define_insn_and_split "lshr<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r    
    ,r  ,r,r")
-        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    
    ,r  ,0,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O 
C15 C31,C4r,n,Qm")))]
+  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r    
,r  ,r,r")
+        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    
,r  ,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O 
C4R,C4r,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
@@ -5969,9 +5969,9 @@
   [(set_attr "isa" "*,*,*,3op,*,*")])
 
 (define_insn "*lshr<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r    
    ,r  ,r,r")
-        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    
    ,r  ,0,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O 
C15 C31,C4r,n,Qm")))
+  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r    
,r  ,r,r")
+        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    
,r  ,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O 
C4R,C4r,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
@@ -6059,10 +6059,10 @@
 ;; "*lshrsq3_const"  "*lshrusq3_const"
 ;; "*lshrsa3_const"  "*lshrusa3_const"
 (define_insn "*lshr<mode>3_const"
-  [(set (match_operand:ALL4 0 "register_operand"               "=r ,r        
,r  ,r")
-        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r        
,r  ,0")
-                       (match_operand:QI 2 "const_int_operand"  "LP,O C15 
C31,C4r,n")))
-   (clobber (match_operand:QI 3 "scratch_or_dreg_operand"      "=X ,X        
,&d ,&d"))
+  [(set (match_operand:ALL4 0 "register_operand"               "=r ,r    ,r  
,r")
+        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r    ,r  
,0")
+                       (match_operand:QI 2 "const_int_operand"  "LP,O 
C4R,C4r,n")))
+   (clobber (match_operand:QI 3 "scratch_or_dreg_operand"      "=X ,X    ,&d 
,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
diff --git a/gcc/config/avr/constraints.md b/gcc/config/avr/constraints.md
index fc8d4d56a666..2ca9cc3d88dd 100644
--- a/gcc/config/avr/constraints.md
+++ b/gcc/config/avr/constraints.md
@@ -328,6 +328,15 @@
   (and (match_code "const_int")
        (match_test "avr_split_shift_p (4, ival, ASHIFT)")))
 
+(define_constraint "C4R"
+  "A constant integer shift offset for a 4-byte LSHIFTRT that's a 3-operand 
insn independent of options."
+  (and (match_code "const_int")
+       (match_test "ival == 15 || IN_RANGE (ival, 25, 31)")))
+
+(define_constraint "C4L"
+  "A constant integer shift offset for a 4-byte ASHIFT that's a 3-operand insn 
independent of options."
+  (and (match_code "const_int")
+       (match_test "ival == 15 || IN_RANGE (ival, 25, 31)")))
 
 ;; CONST_FIXED is no element of 'n' so cook our own.
 ;; "i" or "s" would match but because the insn uses iterators that cover
diff --git a/gcc/testsuite/gcc.target/avr/torture/avr-torture.exp 
b/gcc/testsuite/gcc.target/avr/torture/avr-torture.exp
index 158ef2d4c5ab..91bbe26f10b3 100644
--- a/gcc/testsuite/gcc.target/avr/torture/avr-torture.exp
+++ b/gcc/testsuite/gcc.target/avr/torture/avr-torture.exp
@@ -45,7 +45,7 @@ dg-init
        { -Os -fomit-frame-pointer } \
        { -Os -fomit-frame-pointer -finline-functions } \
        { -O3 -g } \
-       { -Os -mcall-prologues} ]
+       { -Oz -mcall-prologues} ]
 
 
 #Initialize use of torture lists.

Reply via email to