[Bug rtl-optimization/118615] [15 Regression] Bootstrap failure on aarch64 after r15-2810

jakub at gcc dot gnu.org via Gcc-bugs Thu, 20 Mar 2025 10:56:39 -0700

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118615


--- Comment #27 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
So the -fcompare-debug issue is because curr_insn is a JUMP_INSN:
(call_insn 10013 3 4 3 (parallel [
            (call (mem:DI (symbol_ref:DI ("g") [flags 0x41] <function_decl
0x7fffea2d2000 g>) [0 g S8 A8])
                (const_int 0 [0]))
            (unspec:DI [
                    (const_int 2 [0x2])
                ] UNSPEC_CALLEE_ABI)
            (clobber (reg:DI 30 x30))
        ]) "pr118615.c":8:5 63 {*call_insn}
     (expr_list:REG_CALL_DECL (symbol_ref:DI ("g") [flags 0x41] <function_decl
0x7fffea2d2000 g>)
        (nil))
    (expr_list (clobber (reg:DI 17 x17))
        (expr_list (clobber (reg:DI 16 x16))
            (nil))))
(debug_insn 4 10013 10016 3 (debug_marker) "pr118615.c":9:5 -1
     (nil))
(jump_insn 10016 4 10017 3 (set (pc)
        (if_then_else (eq (reg/f:DI 109 [ m ])
                (const_int 0 [0]))
            (label_ref:DI 10025)
            (pc))) "pr118615.c":9:8 37 {aarch64_cbeqdi1}
     (int_list:REG_BR_PROB 499612076 (nil))
 -> 10025)
and because of that before_p is true in:
                      before_p = (JUMP_P (curr_insn)
                                  || (CALL_P (curr_insn) && reg->type ==
OP_IN));
                      if (NONDEBUG_INSN_P (curr_insn)
                          && (! JUMP_P (curr_insn) || reg->type == OP_IN)
                          && split_if_necessary (src_regno, reg->biggest_mode,
                                                 potential_reload_hard_regs,
                                                 before_p, curr_insn, max_uid))
                        {
                          if (reg->subreg_p)
                            check_and_force_assignment_correctness_p = true;
                          change_p = true;
                          /* Invalidate. */
                          usage_insns[src_regno].check = 0;
                          if (before_p)
                            use_insn = PREV_INSN (curr_insn);
                        }
The -g vs. -g0 difference is that use_insn will be after this in one case a
DEBUG_INSN and in the other the CALL_INSN before that.
And things just go wild from that.  Guess the assumption was that
split_if_necessary if it returns true and before_p is true as well must have
added at least one real insn before curr_insn, which is now violated.
Changing the patch to:
--- gcc/lra-constraints.cc.jj   2025-03-19 19:20:41.644440691 +0100
+++ gcc/lra-constraints.cc      2025-03-20 18:40:04.188299643 +0100
@@ -152,6 +152,9 @@ static machine_mode curr_operand_mode[MA
    (e.g. constant) and whose subreg is given operand of the current
    insn.  VOIDmode in all other cases.  */
 static machine_mode original_subreg_reg_mode[MAX_RECOG_OPERANDS];
+/* The first call insn after curr_insn within the EBB during inherit_in_ebb
+   or NULL outside of that function.  */
+static rtx_insn *first_call_insn;




@@ -6373,12 +6376,26 @@ split_reg (bool before_p, int original_r
   lra_process_new_insns (as_a <rtx_insn *> (usage_insn),
                         after_p ? NULL : restore,
                         after_p ? restore : NULL,
-                        call_save_p
-                        ?  "Add reg<-save" : "Add reg<-split");
-  lra_process_new_insns (insn, before_p ? save : NULL,
-                        before_p ? NULL : save,
-                        call_save_p
-                        ?  "Add save<-reg" : "Add split<-reg");
+                        call_save_p ? "Add reg<-save" : "Add reg<-split");
+  if (call_save_p
+      && first_call_insn != NULL
+      && BLOCK_FOR_INSN (first_call_insn) != BLOCK_FOR_INSN (insn))
+    /* PR116028: If original_regno is a pseudo that has been assigned a
+       call-save hard register, then emit the spill insn before the call
+       insn 'first_call_insn' instead of adjacent to 'insn'.  If 'insn'
+       and 'first_call_insn' belong to the same EBB but to two separate
+       BBs, and if 'insn' is present in the entry BB, then generating the
+       spill insn in the entry BB can prevent shrink wrap from happening.
+       This is because the spill insn references the stack pointer and
+       hence the prolog gets generated in the entry BB itself.  It is
+       also more efficient to generate the spill before
+       'first_call_insn' as the spill now occurs only in the path
+       containing the call.  */
+    lra_process_new_insns (first_call_insn, save, NULL, "Add save<-reg");
+  else
+    lra_process_new_insns (insn, before_p ? save : NULL,
+                          before_p ? NULL : save,
+                          call_save_p ? "Add save<-reg" : "Add split<-reg");
   if (nregs > 1 || original_regno < FIRST_PSEUDO_REGISTER)
     /* If we are trying to split multi-register.  We should check
        conflicts on the next assignment sub-pass.  IRA can allocate on
@@ -6484,7 +6501,7 @@ split_if_necessary (int regno, machine_m
                && (INSN_UID (XEXP (next_usage_insns, 0)) < max_uid)))
        && need_for_split_p (potential_reload_hard_regs, regno + i)
        && split_reg (before_p, regno + i, insn, next_usage_insns, NULL))
-    res = true;
+      res = true;
   return res;
 }

@@ -6862,6 +6879,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn
   last_processed_bb = NULL;
   CLEAR_HARD_REG_SET (potential_reload_hard_regs);
   live_hard_regs = eliminable_regset | lra_no_alloc_regs;
+  first_call_insn = NULL;
   /* We don't process new insns generated in the loop. */
   for (curr_insn = tail; curr_insn != PREV_INSN (head); curr_insn = prev_insn)
     {
@@ -7074,6 +7092,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn
              last_call_for_abi[callee_abi.id ()] = calls_num;
              full_and_partial_call_clobbers
                |= callee_abi.full_and_partial_reg_clobbers ();
+             first_call_insn = curr_insn;
              if ((cheap = find_reg_note (curr_insn,
                                          REG_RETURNED, NULL_RTX)) != NULL_RTX
                  && ((cheap = XEXP (cheap, 0)), true)
@@ -7142,6 +7161,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn
                    {
                      bool before_p;
                      rtx_insn *use_insn = curr_insn;
+                     rtx_insn *prev_insn = PREV_INSN (curr_insn);

                      before_p = (JUMP_P (curr_insn)
                                  || (CALL_P (curr_insn) && reg->type ==
OP_IN));
@@ -7156,7 +7176,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn
                          change_p = true;
                          /* Invalidate. */
                          usage_insns[src_regno].check = 0;
-                         if (before_p)
+                         if (before_p && PREV_INSN (curr_insn) != prev_insn)
                            use_insn = PREV_INSN (curr_insn);
                        }
                      if (NONDEBUG_INSN_P (curr_insn))
@@ -7278,6 +7298,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn
            }
        }
     }
+  first_call_insn = NULL;
   return change_p;
 }


makes the -fcompare-debug issue go away.
The big question is if the wrong-code issues that were reported first go away
with this version of the patch which doesn't randomly use unrelated CALL_INSN
when not called from within inherit_in_ebb and has the xstormy BLOCK_FOR_INSN
check.

[Bug rtl-optimization/118615] [15 Regression] Bootstrap failure on aarch64 after r15-2810

Reply via email to