https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118615
--- Comment #27 from Jakub Jelinek <jakub at gcc dot gnu.org> --- So the -fcompare-debug issue is because curr_insn is a JUMP_INSN: (call_insn 10013 3 4 3 (parallel [ (call (mem:DI (symbol_ref:DI ("g") [flags 0x41] <function_decl 0x7fffea2d2000 g>) [0 g S8 A8]) (const_int 0 [0])) (unspec:DI [ (const_int 2 [0x2]) ] UNSPEC_CALLEE_ABI) (clobber (reg:DI 30 x30)) ]) "pr118615.c":8:5 63 {*call_insn} (expr_list:REG_CALL_DECL (symbol_ref:DI ("g") [flags 0x41] <function_decl 0x7fffea2d2000 g>) (nil)) (expr_list (clobber (reg:DI 17 x17)) (expr_list (clobber (reg:DI 16 x16)) (nil)))) (debug_insn 4 10013 10016 3 (debug_marker) "pr118615.c":9:5 -1 (nil)) (jump_insn 10016 4 10017 3 (set (pc) (if_then_else (eq (reg/f:DI 109 [ m ]) (const_int 0 [0])) (label_ref:DI 10025) (pc))) "pr118615.c":9:8 37 {aarch64_cbeqdi1} (int_list:REG_BR_PROB 499612076 (nil)) -> 10025) and because of that before_p is true in: before_p = (JUMP_P (curr_insn) || (CALL_P (curr_insn) && reg->type == OP_IN)); if (NONDEBUG_INSN_P (curr_insn) && (! JUMP_P (curr_insn) || reg->type == OP_IN) && split_if_necessary (src_regno, reg->biggest_mode, potential_reload_hard_regs, before_p, curr_insn, max_uid)) { if (reg->subreg_p) check_and_force_assignment_correctness_p = true; change_p = true; /* Invalidate. */ usage_insns[src_regno].check = 0; if (before_p) use_insn = PREV_INSN (curr_insn); } The -g vs. -g0 difference is that use_insn will be after this in one case a DEBUG_INSN and in the other the CALL_INSN before that. And things just go wild from that. Guess the assumption was that split_if_necessary if it returns true and before_p is true as well must have added at least one real insn before curr_insn, which is now violated. Changing the patch to: --- gcc/lra-constraints.cc.jj 2025-03-19 19:20:41.644440691 +0100 +++ gcc/lra-constraints.cc 2025-03-20 18:40:04.188299643 +0100 @@ -152,6 +152,9 @@ static machine_mode curr_operand_mode[MA (e.g. constant) and whose subreg is given operand of the current insn. VOIDmode in all other cases. */ static machine_mode original_subreg_reg_mode[MAX_RECOG_OPERANDS]; +/* The first call insn after curr_insn within the EBB during inherit_in_ebb + or NULL outside of that function. */ +static rtx_insn *first_call_insn; @@ -6373,12 +6376,26 @@ split_reg (bool before_p, int original_r lra_process_new_insns (as_a <rtx_insn *> (usage_insn), after_p ? NULL : restore, after_p ? restore : NULL, - call_save_p - ? "Add reg<-save" : "Add reg<-split"); - lra_process_new_insns (insn, before_p ? save : NULL, - before_p ? NULL : save, - call_save_p - ? "Add save<-reg" : "Add split<-reg"); + call_save_p ? "Add reg<-save" : "Add reg<-split"); + if (call_save_p + && first_call_insn != NULL + && BLOCK_FOR_INSN (first_call_insn) != BLOCK_FOR_INSN (insn)) + /* PR116028: If original_regno is a pseudo that has been assigned a + call-save hard register, then emit the spill insn before the call + insn 'first_call_insn' instead of adjacent to 'insn'. If 'insn' + and 'first_call_insn' belong to the same EBB but to two separate + BBs, and if 'insn' is present in the entry BB, then generating the + spill insn in the entry BB can prevent shrink wrap from happening. + This is because the spill insn references the stack pointer and + hence the prolog gets generated in the entry BB itself. It is + also more efficient to generate the spill before + 'first_call_insn' as the spill now occurs only in the path + containing the call. */ + lra_process_new_insns (first_call_insn, save, NULL, "Add save<-reg"); + else + lra_process_new_insns (insn, before_p ? save : NULL, + before_p ? NULL : save, + call_save_p ? "Add save<-reg" : "Add split<-reg"); if (nregs > 1 || original_regno < FIRST_PSEUDO_REGISTER) /* If we are trying to split multi-register. We should check conflicts on the next assignment sub-pass. IRA can allocate on @@ -6484,7 +6501,7 @@ split_if_necessary (int regno, machine_m && (INSN_UID (XEXP (next_usage_insns, 0)) < max_uid))) && need_for_split_p (potential_reload_hard_regs, regno + i) && split_reg (before_p, regno + i, insn, next_usage_insns, NULL)) - res = true; + res = true; return res; } @@ -6862,6 +6879,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn last_processed_bb = NULL; CLEAR_HARD_REG_SET (potential_reload_hard_regs); live_hard_regs = eliminable_regset | lra_no_alloc_regs; + first_call_insn = NULL; /* We don't process new insns generated in the loop. */ for (curr_insn = tail; curr_insn != PREV_INSN (head); curr_insn = prev_insn) { @@ -7074,6 +7092,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn last_call_for_abi[callee_abi.id ()] = calls_num; full_and_partial_call_clobbers |= callee_abi.full_and_partial_reg_clobbers (); + first_call_insn = curr_insn; if ((cheap = find_reg_note (curr_insn, REG_RETURNED, NULL_RTX)) != NULL_RTX && ((cheap = XEXP (cheap, 0)), true) @@ -7142,6 +7161,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn { bool before_p; rtx_insn *use_insn = curr_insn; + rtx_insn *prev_insn = PREV_INSN (curr_insn); before_p = (JUMP_P (curr_insn) || (CALL_P (curr_insn) && reg->type == OP_IN)); @@ -7156,7 +7176,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn change_p = true; /* Invalidate. */ usage_insns[src_regno].check = 0; - if (before_p) + if (before_p && PREV_INSN (curr_insn) != prev_insn) use_insn = PREV_INSN (curr_insn); } if (NONDEBUG_INSN_P (curr_insn)) @@ -7278,6 +7298,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn } } } + first_call_insn = NULL; return change_p; } makes the -fcompare-debug issue go away. The big question is if the wrong-code issues that were reported first go away with this version of the patch which doesn't randomly use unrelated CALL_INSN when not called from within inherit_in_ebb and has the xstormy BLOCK_FOR_INSN check.