Hi, with the attached patch we use call-clobbered floating point registers as save slots for general purpose registers in leaf functions.
Bootstrapped and regtested with various options and -march levels. Committed to mainline. Bye, -Andreas- 2013-10-09 Andreas Krebbel <andreas.kreb...@de.ibm.com> * config/s390/s390.c (struct s390_frame_layout): New field gpr_save_slots. (cfun_save_arg_fprs_p, cfun_gpr_save_slot): New macros. (s390_reg_clobbered_rtx, s390_regs_ever_clobbered): Change type of regs_ever_clobbered to char*. (s390_regs_ever_clobbered): Check crtl->saves_all_registers instead of cfun->has_nonlocal_label. Ignore frame related restore INSNs. (s390_register_info): Enable FPR save slots. Move/Copy some functionality into ... (s390_register_info_gprtofpr, s390_register_info_stdarg_fpr) (s390_register_info_stdarg_gpr, s390_optimize_register_info): New function. (s390_frame_info): Do gpr slot allocation here now. stdarg does not imply a stack frame. (s390_init_frame_layout): Remove variable clobbered_regs. (s390_update_register_info): Remove function. (s390_hard_regno_rename_ok): Call-saved regs without a save slot cannot be used for register renaming. (s390_hard_regno_scratch_ok): New function. (TARGET_HARD_REGNO_SCRATCH_OK): Define target hook. (s390_initial_elimination_offset): Change offset calculation of the return address pointer. (save_gprs): Deal with only r6 being saved from the call-saved regs. (restore_gprs): Set frame related flag. (s390_save_gprs_to_fprs, s390_restore_gprs_from_fprs): New functions. (s390_emit_prologue): Call s390_register_info instead of s390_update_frame_layout. Call s390_save_gprs_to_fprs. (s390_emit_epilogue): Call s390_restore_gprs_from_fprs. (s390_optimize_prologue): Call s390_optimize_register_info. Try to remove also FPR slot save/restore INSNs. Remove frame related flags from restore INSNs. --- --- gcc/config/s390/s390.c | 663 ++++++++++--!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1 file changed, 141 insertions(+), 24 deletions(-), 498 modifications(!) Index: gcc/config/s390/s390.c =================================================================== *** gcc/config/s390/s390.c.orig --- gcc/config/s390/s390.c *************** struct GTY (()) s390_frame_layout *** 324,329 **** --- 324,335 ---- int first_save_gpr_slot; int last_save_gpr_slot; + /* Location (FP register number) where GPRs (r0-r15) should + be saved to. + 0 - does not need to be saved at all + -1 - stack slot */ + signed char gpr_save_slots[16]; + /* Number of first and last gpr to be saved, restored. */ int first_save_gpr; int first_restore_gpr; *************** struct GTY(()) machine_function *** 377,388 **** #define cfun_frame_layout (cfun->machine->frame_layout) #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs) ! #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \ cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG) #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \ (1 << (REGNO - FPR0_REGNUM))) #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \ (1 << (REGNO - FPR0_REGNUM)))) /* Number of GPRs and FPRs used for argument passing. */ #define GP_ARG_NUM_REG 5 --- 383,399 ---- #define cfun_frame_layout (cfun->machine->frame_layout) #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs) ! #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \ ! ? cfun_frame_layout.fpr_bitmap & 0x0f \ ! : cfun_frame_layout.fpr_bitmap & 0x03)) ! #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \ cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG) #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \ (1 << (REGNO - FPR0_REGNUM))) #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \ (1 << (REGNO - FPR0_REGNUM)))) + #define cfun_gpr_save_slot(REGNO) \ + cfun->machine->frame_layout.gpr_save_slots[REGNO] /* Number of GPRs and FPRs used for argument passing. */ #define GP_ARG_NUM_REG 5 *************** find_unused_clobbered_reg (void) *** 7364,7370 **** static void s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data) { ! int *regs_ever_clobbered = (int *)data; unsigned int i, regno; enum machine_mode mode = GET_MODE (setreg); --- 7375,7381 ---- static void s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data) { ! char *regs_ever_clobbered = (char *)data; unsigned int i, regno; enum machine_mode mode = GET_MODE (setreg); *************** s390_reg_clobbered_rtx (rtx setreg, cons *** 7392,7404 **** each of those regs. */ static void ! s390_regs_ever_clobbered (int *regs_ever_clobbered) { basic_block cur_bb; rtx cur_insn; unsigned int i; ! memset (regs_ever_clobbered, 0, 32 * sizeof (int)); /* For non-leaf functions we have to consider all call clobbered regs to be clobbered. */ --- 7403,7415 ---- each of those regs. */ static void ! s390_regs_ever_clobbered (char regs_ever_clobbered[]) { basic_block cur_bb; rtx cur_insn; unsigned int i; ! memset (regs_ever_clobbered, 0, 32); /* For non-leaf functions we have to consider all call clobbered regs to be clobbered. */ *************** s390_regs_ever_clobbered (int *regs_ever *** 7425,7431 **** This flag is also set for the unwinding code in libgcc. See expand_builtin_unwind_init. For regs_ever_live this is done by reload. */ ! if (cfun->has_nonlocal_label) for (i = 0; i < 32; i++) if (!call_really_used_regs[i]) regs_ever_clobbered[i] = 1; --- 7436,7442 ---- This flag is also set for the unwinding code in libgcc. See expand_builtin_unwind_init. For regs_ever_live this is done by reload. */ ! if (crtl->saves_all_registers) for (i = 0; i < 32; i++) if (!call_really_used_regs[i]) regs_ever_clobbered[i] = 1; *************** s390_regs_ever_clobbered (int *regs_ever *** 7434,7443 **** { FOR_BB_INSNS (cur_bb, cur_insn) { ! if (INSN_P (cur_insn)) ! note_stores (PATTERN (cur_insn), ! s390_reg_clobbered_rtx, ! regs_ever_clobbered); } } } --- 7445,7482 ---- { FOR_BB_INSNS (cur_bb, cur_insn) { ! rtx pat; ! ! if (!INSN_P (cur_insn)) ! continue; ! ! pat = PATTERN (cur_insn); ! ! /* Ignore GPR restore insns. */ ! if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn)) ! { ! if (GET_CODE (pat) == SET ! && GENERAL_REG_P (SET_DEST (pat))) ! { ! /* lgdr */ ! if (GET_MODE (SET_SRC (pat)) == DImode ! && FP_REG_P (SET_SRC (pat))) ! continue; ! ! /* l / lg */ ! if (GET_CODE (SET_SRC (pat)) == MEM) ! continue; ! } ! ! /* lm / lmg */ ! if (GET_CODE (pat) == PARALLEL ! && load_multiple_operation (pat, VOIDmode)) ! continue; ! } ! ! note_stores (pat, ! s390_reg_clobbered_rtx, ! regs_ever_clobbered); } } } *************** s390_frame_area (int *area_bottom, int * *** 7487,7559 **** *area_bottom = b; *area_top = t; } ! ! /* Fill cfun->machine with info about register usage of current function. ! Return in CLOBBERED_REGS which GPRs are currently considered set. */ static void ! s390_register_info (int clobbered_regs[]) { int i, j; ! /* Find first and last gpr to be saved. We trust regs_ever_live ! data, except that we don't save and restore global registers. ! ! Also, all registers with special meaning to the compiler need ! to be handled extra. */ ! ! s390_regs_ever_clobbered (clobbered_regs); ! /* fprs 8 - 15 are call saved for 64 Bit ABI. */ ! if (!epilogue_completed) { ! cfun_frame_layout.fpr_bitmap = 0; ! cfun_frame_layout.high_fprs = 0; ! ! for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++) ! { ! if (call_really_used_regs[i]) ! continue; ! /* During reload we have to use the df_regs_ever_live infos ! since reload is marking FPRs used as spill slots there as ! live before actually making the code changes. Without ! this we fail during elimination offset verification. */ ! if ((clobbered_regs[i] ! || (df_regs_ever_live_p (i) ! && (lra_in_progress ! || reload_in_progress ! || crtl->saves_all_registers))) ! && !global_regs[i]) ! { ! cfun_set_fpr_save (i); ! if (i >= FPR8_REGNUM) ! cfun_frame_layout.high_fprs++; ! } } } ! for (i = 0; i < 16; i++) ! clobbered_regs[i] = clobbered_regs[i] && !global_regs[i] && !fixed_regs[i]; ! if (frame_pointer_needed) ! clobbered_regs[HARD_FRAME_POINTER_REGNUM] = 1; if (flag_pic) clobbered_regs[PIC_OFFSET_TABLE_REGNUM] ! |= df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM); clobbered_regs[BASE_REGNUM] |= (cfun->machine->base_reg ! && REGNO (cfun->machine->base_reg) == BASE_REGNUM); clobbered_regs[RETURN_REGNUM] |= (!crtl->is_leaf || TARGET_TPF_PROFILING || cfun->machine->split_branches_pending_p || cfun_frame_layout.save_return_addr_p ! || crtl->calls_eh_return ! || cfun->stdarg); clobbered_regs[STACK_POINTER_REGNUM] |= (!crtl->is_leaf --- 7526,7693 ---- *area_bottom = b; *area_top = t; } ! /* Update gpr_save_slots in the frame layout trying to make use of ! FPRs as GPR save slots. ! This is a helper routine of s390_register_info. */ static void ! s390_register_info_gprtofpr () { + int save_reg_slot = FPR0_REGNUM; int i, j; ! if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf) ! return; ! for (i = 15; i >= 6; i--) { ! if (cfun_gpr_save_slot (i) == 0) ! continue; ! /* Advance to the next FP register which can be used as a ! GPR save slot. */ ! while ((!call_really_used_regs[save_reg_slot] ! || df_regs_ever_live_p (save_reg_slot) ! || cfun_fpr_save_p (save_reg_slot)) ! && FP_REGNO_P (save_reg_slot)) ! save_reg_slot++; ! if (!FP_REGNO_P (save_reg_slot)) ! { ! /* We only want to use ldgr/lgdr if we can get rid of ! stm/lm entirely. So undo the gpr slot allocation in ! case we ran out of FPR save slots. */ ! for (j = 6; j <= 15; j++) ! if (FP_REGNO_P (cfun_gpr_save_slot (j))) ! cfun_gpr_save_slot (j) = -1; ! break; } + cfun_gpr_save_slot (i) = save_reg_slot++; } + } ! /* Set the bits in fpr_bitmap for FPRs which need to be saved due to ! stdarg. ! This is a helper routine for s390_register_info. */ ! static void ! s390_register_info_stdarg_fpr () ! { ! int i; ! int min_fpr; ! int max_fpr; ! ! /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and ! f0-f4 for 64 bit. */ ! if (!cfun->stdarg ! || !TARGET_HARD_FLOAT ! || !cfun->va_list_fpr_size ! || crtl->args.info.fprs >= FP_ARG_NUM_REG) ! return; ! ! min_fpr = crtl->args.info.fprs; ! max_fpr = min_fpr + cfun->va_list_fpr_size; ! if (max_fpr > FP_ARG_NUM_REG) ! max_fpr = FP_ARG_NUM_REG; ! ! /* The va_arg algorithm accesses the FPRs in the reg save area using ! a constant offset from r0. With the packed stack layout omitting ! FPRs from the beginning would change the offset for the ! subsequent FPRs. */ ! if (TARGET_PACKED_STACK) ! min_fpr = 0; ! ! for (i = min_fpr; i < max_fpr; i++) ! cfun_set_fpr_save (i + FPR0_REGNUM); ! } ! ! /* Reserve the GPR save slots for GPRs which need to be saved due to ! stdarg. ! This is a helper routine for s390_register_info. */ ! ! static void ! s390_register_info_stdarg_gpr () ! { ! int i; ! int min_gpr; ! int max_gpr; ! ! if (!cfun->stdarg ! || !cfun->va_list_gpr_size ! || crtl->args.info.gprs >= GP_ARG_NUM_REG) ! return; ! ! min_gpr = crtl->args.info.gprs; ! max_gpr = min_gpr + cfun->va_list_gpr_size; ! if (max_gpr > GP_ARG_NUM_REG) ! max_gpr = GP_ARG_NUM_REG; ! ! for (i = min_gpr; i < max_gpr; i++) ! cfun_gpr_save_slot (2 + i) = -1; ! } ! ! /* The GPR and FPR save slots in cfun->machine->frame_layout are set ! for registers which need to be saved in function prologue. ! This function can be used until the insns emitted for save/restore ! of the regs are visible in the RTL stream. */ ! ! static void ! s390_register_info () ! { ! int i, j; ! char clobbered_regs[32]; ! ! gcc_assert (!epilogue_completed); ! ! if (reload_completed) ! /* After reload we rely on our own routine to determine which ! registers need saving. */ ! s390_regs_ever_clobbered (clobbered_regs); ! else ! /* During reload we use regs_ever_live as a base since reload ! does changes in there which we otherwise would not be aware ! of. */ ! for (i = 0; i < 32; i++) ! clobbered_regs[i] = df_regs_ever_live_p (i); ! ! for (i = 0; i < 32; i++) ! clobbered_regs[i] = clobbered_regs[i] && !global_regs[i]; ! ! /* Mark the call-saved FPRs which need to be saved. ! This needs to be done before checking the special GPRs since the ! stack pointer usage depends on whether high FPRs have to be saved ! or not. */ ! cfun_frame_layout.fpr_bitmap = 0; ! cfun_frame_layout.high_fprs = 0; ! for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++) ! if (clobbered_regs[i] && !call_really_used_regs[i]) ! { ! cfun_set_fpr_save (i); ! if (i >= FPR8_REGNUM) ! cfun_frame_layout.high_fprs++; ! } if (flag_pic) clobbered_regs[PIC_OFFSET_TABLE_REGNUM] ! |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM); clobbered_regs[BASE_REGNUM] |= (cfun->machine->base_reg ! && REGNO (cfun->machine->base_reg) == BASE_REGNUM); ! ! clobbered_regs[HARD_FRAME_POINTER_REGNUM] ! |= !!frame_pointer_needed; + /* On pre z900 machines this might take until machine dependent + reorg to decide. + save_return_addr_p will only be set on non-zarch machines so + there is no risk that r14 goes into an FPR instead of a stack + slot. */ clobbered_regs[RETURN_REGNUM] |= (!crtl->is_leaf || TARGET_TPF_PROFILING || cfun->machine->split_branches_pending_p || cfun_frame_layout.save_return_addr_p ! || crtl->calls_eh_return); clobbered_regs[STACK_POINTER_REGNUM] |= (!crtl->is_leaf *************** s390_register_info (int clobbered_regs[] *** 7561,7664 **** || cfun_save_high_fprs_p || get_frame_size () > 0 || (reload_completed && cfun_frame_layout.frame_size > 0) ! || cfun->calls_alloca ! || cfun->stdarg); for (i = 6; i < 16; i++) ! if (df_regs_ever_live_p (i) || clobbered_regs[i]) ! break; ! for (j = 15; j > i; j--) ! if (df_regs_ever_live_p (j) || clobbered_regs[j]) ! break; ! if (i == 16) ! { ! /* Nothing to save/restore. */ ! cfun_frame_layout.first_save_gpr_slot = -1; ! cfun_frame_layout.last_save_gpr_slot = -1; ! cfun_frame_layout.first_save_gpr = -1; ! cfun_frame_layout.first_restore_gpr = -1; ! cfun_frame_layout.last_save_gpr = -1; ! cfun_frame_layout.last_restore_gpr = -1; ! } ! else ! { ! /* Save slots for gprs from i to j. */ ! cfun_frame_layout.first_save_gpr_slot = i; ! cfun_frame_layout.last_save_gpr_slot = j; ! for (i = cfun_frame_layout.first_save_gpr_slot; ! i < cfun_frame_layout.last_save_gpr_slot + 1; ! i++) ! if (clobbered_regs[i]) ! break; ! for (j = cfun_frame_layout.last_save_gpr_slot; j > i; j--) ! if (clobbered_regs[j]) ! break; ! if (i == cfun_frame_layout.last_save_gpr_slot + 1) ! { ! /* Nothing to save/restore. */ ! cfun_frame_layout.first_save_gpr = -1; ! cfun_frame_layout.first_restore_gpr = -1; ! cfun_frame_layout.last_save_gpr = -1; ! cfun_frame_layout.last_restore_gpr = -1; ! } ! else ! { ! /* Save / Restore from gpr i to j. */ ! cfun_frame_layout.first_save_gpr = i; ! cfun_frame_layout.first_restore_gpr = i; ! cfun_frame_layout.last_save_gpr = j; ! cfun_frame_layout.last_restore_gpr = j; ! } ! } ! if (cfun->stdarg) ! { ! /* Varargs functions need to save gprs 2 to 6. */ ! if (cfun->va_list_gpr_size ! && crtl->args.info.gprs < GP_ARG_NUM_REG) ! { ! int min_gpr = crtl->args.info.gprs; ! int max_gpr = min_gpr + cfun->va_list_gpr_size; ! if (max_gpr > GP_ARG_NUM_REG) ! max_gpr = GP_ARG_NUM_REG; ! if (cfun_frame_layout.first_save_gpr == -1 ! || cfun_frame_layout.first_save_gpr > 2 + min_gpr) ! { ! cfun_frame_layout.first_save_gpr = 2 + min_gpr; ! cfun_frame_layout.first_save_gpr_slot = 2 + min_gpr; ! } ! if (cfun_frame_layout.last_save_gpr == -1 ! || cfun_frame_layout.last_save_gpr < 2 + max_gpr - 1) ! { ! cfun_frame_layout.last_save_gpr = 2 + max_gpr - 1; ! cfun_frame_layout.last_save_gpr_slot = 2 + max_gpr - 1; ! } ! } ! /* Mark f0, f2 for 31 bit and f0-f4 for 64 bit to be saved. */ ! if (TARGET_HARD_FLOAT && cfun->va_list_fpr_size ! && crtl->args.info.fprs < FP_ARG_NUM_REG) ! { ! int min_fpr = crtl->args.info.fprs; ! int max_fpr = min_fpr + cfun->va_list_fpr_size; ! if (max_fpr > FP_ARG_NUM_REG) ! max_fpr = FP_ARG_NUM_REG; ! /* ??? This is currently required to ensure proper location ! of the fpr save slots within the va_list save area. */ ! if (TARGET_PACKED_STACK) ! min_fpr = 0; ! for (i = min_fpr; i < max_fpr; i++) ! cfun_set_fpr_save (i + FPR0_REGNUM); ! } ! } } /* Fill cfun->machine with info about frame of current function. */ --- 7695,7779 ---- || cfun_save_high_fprs_p || get_frame_size () > 0 || (reload_completed && cfun_frame_layout.frame_size > 0) ! || cfun->calls_alloca); ! ! memset (cfun_frame_layout.gpr_save_slots, 0, 16); for (i = 6; i < 16; i++) ! if (clobbered_regs[i]) ! cfun_gpr_save_slot (i) = -1; ! s390_register_info_stdarg_fpr (); ! s390_register_info_gprtofpr (); ! /* First find the range of GPRs to be restored. Vararg regs don't ! need to be restored so we do it before assigning slots to the ! vararg GPRs. */ ! for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++); ! for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--); ! cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i; ! cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j; ! /* stdarg functions might need to save GPRs 2 to 6. This might ! override the GPR->FPR save decision made above for r6 since ! vararg regs must go to the stack. */ ! s390_register_info_stdarg_gpr (); ! /* Now the range of GPRs which need saving. */ ! for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++); ! for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--); ! cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i; ! cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j; ! } ! /* This function is called by s390_optimize_prologue in order to get ! rid of unnecessary GPR save/restore instructions. The register info ! for the GPRs is re-computed and the ranges are re-calculated. */ ! static void ! s390_optimize_register_info () ! { ! char clobbered_regs[32]; ! int i, j; ! gcc_assert (epilogue_completed); ! gcc_assert (!cfun->machine->split_branches_pending_p); ! s390_regs_ever_clobbered (clobbered_regs); ! for (i = 0; i < 32; i++) ! clobbered_regs[i] = clobbered_regs[i] && !global_regs[i]; ! /* There is still special treatment needed for cases invisible to ! s390_regs_ever_clobbered. */ ! clobbered_regs[RETURN_REGNUM] ! |= (TARGET_TPF_PROFILING ! /* When expanding builtin_return_addr in ESA mode we do not ! know whether r14 will later be needed as scratch reg when ! doing branch splitting. So the builtin always accesses the ! r14 save slot and we need to stick to the save/restore ! decision for r14 even if it turns out that it didn't get ! clobbered. */ ! || cfun_frame_layout.save_return_addr_p ! || crtl->calls_eh_return); ! ! memset (cfun_frame_layout.gpr_save_slots, 0, 6); ! ! for (i = 6; i < 16; i++) ! if (!clobbered_regs[i]) ! cfun_gpr_save_slot (i) = 0; ! ! for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++); ! for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--); ! cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i; ! cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j; ! ! s390_register_info_stdarg_gpr (); ! ! for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++); ! for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--); ! cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i; ! cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j; } /* Fill cfun->machine with info about frame of current function. */ *************** s390_frame_info (void) *** 7668,7673 **** --- 7783,7804 ---- { int i; + cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr; + cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr; + + /* The va_arg builtin uses a constant distance of 16 * + UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area + pointer. So even if we are going to save the stack pointer in an + FPR we need the stack space in order to keep the offsets + correct. */ + if (cfun->stdarg && cfun_save_arg_fprs_p) + { + cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM; + + if (cfun_frame_layout.first_save_gpr_slot == -1) + cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM; + } + cfun_frame_layout.frame_size = get_frame_size (); if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000) fatal_error ("total size of local variables exceeds architecture limit"); *************** s390_frame_info (void) *** 7738,7745 **** && !TARGET_TPF_PROFILING && cfun_frame_layout.frame_size == 0 && !cfun_save_high_fprs_p ! && !cfun->calls_alloca ! && !cfun->stdarg) return; if (!TARGET_PACKED_STACK) --- 7869,7875 ---- && !TARGET_TPF_PROFILING && cfun_frame_layout.frame_size == 0 && !cfun_save_high_fprs_p ! && !cfun->calls_alloca) return; if (!TARGET_PACKED_STACK) *************** s390_init_frame_layout (void) *** 7785,7791 **** { HOST_WIDE_INT frame_size; int base_used; ! int clobbered_regs[32]; /* On S/390 machines, we may need to perform branch splitting, which will require both base and return address register. We have no --- 7915,7922 ---- { HOST_WIDE_INT frame_size; int base_used; ! ! gcc_assert (!reload_completed); /* On S/390 machines, we may need to perform branch splitting, which will require both base and return address register. We have no *************** s390_init_frame_layout (void) *** 7814,7820 **** else cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM); ! s390_register_info (clobbered_regs); s390_frame_info (); } while (frame_size != cfun_frame_layout.frame_size); --- 7945,7951 ---- else cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM); ! s390_register_info (); s390_frame_info (); } while (frame_size != cfun_frame_layout.frame_size); *************** s390_optimize_nonescaping_tx (void) *** 7971,7999 **** return; } - /* Update frame layout. Recompute actual register save data based on - current info and update regs_ever_live for the special registers. - May be called multiple times, but may never cause *more* registers - to be saved than s390_init_frame_layout allocated room for. */ - - static void - s390_update_frame_layout (void) - { - int clobbered_regs[32]; - - s390_register_info (clobbered_regs); - - df_set_regs_ever_live (BASE_REGNUM, - clobbered_regs[BASE_REGNUM] ? true : false); - df_set_regs_ever_live (RETURN_REGNUM, - clobbered_regs[RETURN_REGNUM] ? true : false); - df_set_regs_ever_live (STACK_POINTER_REGNUM, - clobbered_regs[STACK_POINTER_REGNUM] ? true : false); - - if (cfun->machine->base_reg) - df_set_regs_ever_live (REGNO (cfun->machine->base_reg), true); - } - /* Return true if it is legal to put a value with MODE into REGNO. */ bool --- 8102,8107 ---- *************** s390_hard_regno_rename_ok (unsigned int *** 8054,8059 **** --- 8162,8192 ---- || REGNO (cfun->machine->base_reg) == new_reg) return false; + /* Prevent regrename from using call-saved regs which haven't + actually been saved. This is necessary since regrename assumes + the backend save/restore decisions are based on + df_regs_ever_live. Since we have our own routine we have to tell + regrename manually about it. */ + if (GENERAL_REGNO_P (new_reg) + && !call_really_used_regs[new_reg] + && cfun_gpr_save_slot (new_reg) == 0) + return false; + + return true; + } + + /* Return nonzero if register REGNO can be used as a scratch register + in peephole2. */ + + static bool + s390_hard_regno_scratch_ok (unsigned int regno) + { + /* See s390_hard_regno_rename_ok. */ + if (GENERAL_REGNO_P (regno) + && !call_really_used_regs[regno] + && cfun_gpr_save_slot (regno) == 0) + return false; + return true; } *************** HOST_WIDE_INT *** 8133,8139 **** s390_initial_elimination_offset (int from, int to) { HOST_WIDE_INT offset; - int index; /* ??? Why are we called for non-eliminable pairs? */ if (!s390_can_eliminate (from, to)) --- 8266,8271 ---- *************** s390_initial_elimination_offset (int fro *** 8154,8163 **** case RETURN_ADDRESS_POINTER_REGNUM: s390_init_frame_layout (); ! index = RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot; ! gcc_assert (index >= 0); ! offset = cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset; ! offset += index * UNITS_PER_LONG; break; case BASE_REGNUM: --- 8286,8311 ---- case RETURN_ADDRESS_POINTER_REGNUM: s390_init_frame_layout (); ! ! if (cfun_frame_layout.first_save_gpr_slot == -1) ! { ! /* If it turns out that for stdarg nothing went into the reg ! save area we also do not need the return address ! pointer. */ ! if (cfun->stdarg && !cfun_save_arg_fprs_p) ! return 0; ! ! gcc_unreachable (); ! } ! ! /* In order to make the following work it is not necessary for ! r14 to have a save slot. It is sufficient if one other GPR ! got one. Since the GPRs are always stored without gaps we ! are able to calculate where the r14 save slot would ! reside. */ ! offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset + ! (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) * ! UNITS_PER_LONG); break; case BASE_REGNUM: *************** save_gprs (rtx base, int offset, int fir *** 8295,8300 **** --- 8443,8465 ---- addr = plus_constant (Pmode, base, offset + (start - first) * UNITS_PER_LONG); + + if (start == last) + { + if (TARGET_64BIT) + note = gen_movdi (gen_rtx_MEM (Pmode, addr), + gen_rtx_REG (Pmode, start)); + else + note = gen_movsi (gen_rtx_MEM (Pmode, addr), + gen_rtx_REG (Pmode, start)); + note = PATTERN (note); + + add_reg_note (insn, REG_FRAME_RELATED_EXPR, note); + RTX_FRAME_RELATED_P (insn) = 1; + + return insn; + } + note = gen_store_multiple (gen_rtx_MEM (Pmode, addr), gen_rtx_REG (Pmode, start), GEN_INT (last - start + 1)); *************** restore_gprs (rtx base, int offset, int *** 8335,8346 **** --- 8500,8513 ---- else insn = gen_movsi (gen_rtx_REG (Pmode, first), addr); + RTX_FRAME_RELATED_P (insn) = 1; return insn; } insn = gen_load_multiple (gen_rtx_REG (Pmode, first), addr, GEN_INT (last - first + 1)); + RTX_FRAME_RELATED_P (insn) = 1; return insn; } *************** s390_emit_stack_tie (void) *** 8405,8410 **** --- 8572,8627 ---- emit_insn (gen_stack_tie (mem)); } + /* Copy GPRS into FPR save slots. */ + + static void + s390_save_gprs_to_fprs (void) + { + int i; + + if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf) + return; + + for (i = 6; i < 16; i++) + { + if (FP_REGNO_P (cfun_gpr_save_slot (i))) + { + rtx insn = + emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)), + gen_rtx_REG (DImode, i)); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + } + + /* Restore GPRs from FPR save slots. */ + + static void + s390_restore_gprs_from_fprs (void) + { + int i; + + if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf) + return; + + for (i = 6; i < 16; i++) + { + if (FP_REGNO_P (cfun_gpr_save_slot (i))) + { + rtx insn = + emit_move_insn (gen_rtx_REG (DImode, i), + gen_rtx_REG (DImode, cfun_gpr_save_slot (i))); + df_set_regs_ever_live (i, true); + /* The frame related flag is only required on the save + operations. We nevertheless set it also for the restore + in order to recognize these instructions in + s390_optimize_prologue. The flag will then be + deleted. */ + RTX_FRAME_RELATED_P (insn) = 1; + } + } + } + /* Expand the prologue into a bunch of separate insns. */ void *************** s390_emit_prologue (void) *** 8419,8426 **** /* Try to get rid of the FPR clobbers. */ s390_optimize_nonescaping_tx (); ! /* Complete frame layout. */ ! s390_update_frame_layout (); /* Annotate all constant pool references to let the scheduler know they implicitly use the base register. */ --- 8636,8643 ---- /* Try to get rid of the FPR clobbers. */ s390_optimize_nonescaping_tx (); ! /* Re-compute register info. */ ! s390_register_info (); /* Annotate all constant pool references to let the scheduler know they implicitly use the base register. */ *************** s390_emit_prologue (void) *** 8446,8451 **** --- 8663,8670 ---- else temp_reg = gen_rtx_REG (Pmode, 1); + s390_save_gprs_to_fprs (); + /* Save call saved gprs. */ if (cfun_frame_layout.first_save_gpr != -1) { *************** s390_emit_epilogue (bool sibcall) *** 8900,8905 **** --- 9119,9126 ---- RTX_FRAME_RELATED_P (insn) = 1; } + s390_restore_gprs_from_fprs (); + if (! sibcall) { *************** s390_optimize_prologue (void) *** 10561,10568 **** rtx insn, new_insn, next_insn; /* Do a final recompute of the frame-related data. */ ! ! s390_update_frame_layout (); /* If all special registers are in fact used, there's nothing we can do, so no point in walking the insn list. */ --- 10782,10788 ---- rtx insn, new_insn, next_insn; /* Do a final recompute of the frame-related data. */ ! s390_optimize_register_info (); /* If all special registers are in fact used, there's nothing we can do, so no point in walking the insn list. */ *************** s390_optimize_prologue (void) *** 10580,10597 **** { int first, last, off; rtx set, base, offset; next_insn = NEXT_INSN (insn); ! if (! NONJUMP_INSN_P (insn)) continue; ! if (GET_CODE (PATTERN (insn)) == PARALLEL ! && store_multiple_operation (PATTERN (insn), VOIDmode)) { ! set = XVECEXP (PATTERN (insn), 0, 0); first = REGNO (SET_SRC (set)); ! last = first + XVECLEN (PATTERN (insn), 0) - 1; offset = const0_rtx; base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset); off = INTVAL (offset); --- 10800,10862 ---- { int first, last, off; rtx set, base, offset; + rtx pat; next_insn = NEXT_INSN (insn); ! if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn)) continue; ! pat = PATTERN (insn); ! ! /* Remove ldgr/lgdr instructions used for saving and restore ! GPRs if possible. */ ! if (TARGET_Z10 ! && GET_CODE (pat) == SET ! && GET_MODE (SET_SRC (pat)) == DImode ! && REG_P (SET_SRC (pat)) ! && REG_P (SET_DEST (pat))) ! { ! int src_regno = REGNO (SET_SRC (pat)); ! int dest_regno = REGNO (SET_DEST (pat)); ! int gpr_regno; ! int fpr_regno; ! ! if (!((GENERAL_REGNO_P (src_regno) && FP_REGNO_P (dest_regno)) ! || (FP_REGNO_P (src_regno) && GENERAL_REGNO_P (dest_regno)))) ! continue; ! ! gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno; ! fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno; ! ! /* GPR must be call-saved, FPR must be call-clobbered. */ ! if (!call_really_used_regs[fpr_regno] ! || call_really_used_regs[gpr_regno]) ! continue; ! ! /* For restores we have to revert the frame related flag ! since no debug info is supposed to be generated for ! these. */ ! if (dest_regno == gpr_regno) ! RTX_FRAME_RELATED_P (insn) = 0; ! ! /* It must not happen that what we once saved in an FPR now ! needs a stack slot. */ ! gcc_assert (cfun_gpr_save_slot (gpr_regno) != -1); ! ! if (cfun_gpr_save_slot (gpr_regno) == 0) ! { ! remove_insn (insn); ! continue; ! } ! } ! ! if (GET_CODE (pat) == PARALLEL ! && store_multiple_operation (pat, VOIDmode)) { ! set = XVECEXP (pat, 0, 0); first = REGNO (SET_SRC (set)); ! last = first + XVECLEN (pat, 0) - 1; offset = const0_rtx; base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset); off = INTVAL (offset); *************** s390_optimize_prologue (void) *** 10624,10637 **** } if (cfun_frame_layout.first_save_gpr == -1 ! && GET_CODE (PATTERN (insn)) == SET ! && GET_CODE (SET_SRC (PATTERN (insn))) == REG ! && (REGNO (SET_SRC (PATTERN (insn))) == BASE_REGNUM ! || (!TARGET_CPU_ZARCH ! && REGNO (SET_SRC (PATTERN (insn))) == RETURN_REGNUM)) ! && GET_CODE (SET_DEST (PATTERN (insn))) == MEM) { ! set = PATTERN (insn); first = REGNO (SET_SRC (set)); offset = const0_rtx; base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset); --- 10889,10899 ---- } if (cfun_frame_layout.first_save_gpr == -1 ! && GET_CODE (pat) == SET ! && GENERAL_REG_P (SET_SRC (pat)) ! && GET_CODE (SET_DEST (pat)) == MEM) { ! set = pat; first = REGNO (SET_SRC (set)); offset = const0_rtx; base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset); *************** s390_optimize_prologue (void) *** 10647,10664 **** continue; } ! if (GET_CODE (PATTERN (insn)) == PARALLEL ! && load_multiple_operation (PATTERN (insn), VOIDmode)) { ! set = XVECEXP (PATTERN (insn), 0, 0); first = REGNO (SET_DEST (set)); ! last = first + XVECLEN (PATTERN (insn), 0) - 1; offset = const0_rtx; base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset); off = INTVAL (offset); if (GET_CODE (base) != REG || off < 0) continue; if (cfun_frame_layout.first_restore_gpr != -1 && (cfun_frame_layout.first_restore_gpr < first || cfun_frame_layout.last_restore_gpr > last)) --- 10909,10929 ---- continue; } ! if (GET_CODE (pat) == PARALLEL ! && load_multiple_operation (pat, VOIDmode)) { ! set = XVECEXP (pat, 0, 0); first = REGNO (SET_DEST (set)); ! last = first + XVECLEN (pat, 0) - 1; offset = const0_rtx; base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset); off = INTVAL (offset); if (GET_CODE (base) != REG || off < 0) continue; + + RTX_FRAME_RELATED_P (insn) = 0; + if (cfun_frame_layout.first_restore_gpr != -1 && (cfun_frame_layout.first_restore_gpr < first || cfun_frame_layout.last_restore_gpr > last)) *************** s390_optimize_prologue (void) *** 10676,10681 **** --- 10941,10947 ---- - first) * UNITS_PER_LONG, cfun_frame_layout.first_restore_gpr, cfun_frame_layout.last_restore_gpr); + RTX_FRAME_RELATED_P (new_insn) = 0; new_insn = emit_insn_before (new_insn, insn); INSN_ADDRESSES_NEW (new_insn, -1); } *************** s390_optimize_prologue (void) *** 10685,10698 **** } if (cfun_frame_layout.first_restore_gpr == -1 ! && GET_CODE (PATTERN (insn)) == SET ! && GET_CODE (SET_DEST (PATTERN (insn))) == REG ! && (REGNO (SET_DEST (PATTERN (insn))) == BASE_REGNUM ! || (!TARGET_CPU_ZARCH ! && REGNO (SET_DEST (PATTERN (insn))) == RETURN_REGNUM)) ! && GET_CODE (SET_SRC (PATTERN (insn))) == MEM) { ! set = PATTERN (insn); first = REGNO (SET_DEST (set)); offset = const0_rtx; base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset); --- 10951,10961 ---- } if (cfun_frame_layout.first_restore_gpr == -1 ! && GET_CODE (pat) == SET ! && GENERAL_REG_P (SET_DEST (pat)) ! && GET_CODE (SET_SRC (pat)) == MEM) { ! set = pat; first = REGNO (SET_DEST (set)); offset = const0_rtx; base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset); *************** s390_optimize_prologue (void) *** 10700,10705 **** --- 10963,10971 ---- if (GET_CODE (base) != REG || off < 0) continue; + + RTX_FRAME_RELATED_P (insn) = 0; + if (REGNO (base) != STACK_POINTER_REGNUM && REGNO (base) != HARD_FRAME_POINTER_REGNUM) continue; *************** s390_loop_unroll_adjust (unsigned nunrol *** 11638,11643 **** --- 11904,11912 ---- #undef TARGET_CANONICALIZE_COMPARISON #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison + #undef TARGET_HARD_REGNO_SCRATCH_OK + #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-s390.h"