The following patch switches on optional reloads. The optional reload patch resolves PR55342. The original code was switched off as it resulted in new GCC failures. The current code contains a new version of optional reload implementation. I've tried many different strategies to use optional reloads, all of them (except twos) resulted in SPEC2000 performance degradation.
The current heuristic is the best and probably one of the simplest one. It results in about 0.6% and 0.7% SPECFP2000 performance improvement for x86-64 and x86 (measured on i5-4670 with using -O3 -mtune=corei7). That is the best what I got on SPEC2000. The biggest average size increase is about 0.2% for SPECFP2000 on x86 and less 0.07% for SPECInt2000 on x86/x86-64 (for x86-64 SPECFP2000 the average code size is actually smaller). The patch was successfully bootstrapped an tested on x86/x86-64. Committed as rev. 202468. 2013-09-10 Vladimir Makarov <vmaka...@redhat.com> * lra.c (lra): Clear lra_optional_reload_pseudos before every constraint pass. * lra-constraints.c (curr_insn_transform): Switch on optional reloads. Check destination too to check move insn. (undo_optional_reloads): Add check that the original peudo did not changed its allocation and the optional reload was inherited on last inheritance pass. Break loop after deciding to keep optional reload. (lra_undo_inheritance): Add check that inherited pseudo still in memory.
Index: lra.c =================================================================== --- lra.c (revision 202223) +++ lra.c (working copy) @@ -2313,6 +2313,7 @@ lra (FILE *f) { for (;;) { + bitmap_clear (&lra_optional_reload_pseudos); /* We should try to assign hard registers to scratches even if there were no RTL transformations in lra_constraints. */ @@ -2363,7 +2364,6 @@ lra (FILE *f) if (! live_p) lra_clear_live_ranges (); } - bitmap_clear (&lra_optional_reload_pseudos); } bitmap_clear (&lra_subreg_reload_pseudos); bitmap_clear (&lra_inheritance_pseudos); Index: lra-constraints.c =================================================================== --- lra-constraints.c (revision 202223) +++ lra-constraints.c (working copy) @@ -3307,15 +3307,19 @@ curr_insn_transform (void) reg, we might improve the code through inheritance. If it does not get a hard register we coalesce memory/memory moves later. Ignore move insns to avoid cycling. */ - if (0 && ! lra_simple_p + if (! lra_simple_p && lra_undo_inheritance_iter < LRA_MAX_INHERITANCE_PASSES && goal_alt[i] != NO_REGS && REG_P (op) && (regno = REGNO (op)) >= FIRST_PSEUDO_REGISTER + && ! lra_former_scratch_p (regno) && reg_renumber[regno] < 0 && (curr_insn_set == NULL_RTX - || !(REG_P (SET_SRC (curr_insn_set)) - || MEM_P (SET_SRC (curr_insn_set)) - || GET_CODE (SET_SRC (curr_insn_set)) == SUBREG))) + || !((REG_P (SET_SRC (curr_insn_set)) + || MEM_P (SET_SRC (curr_insn_set)) + || GET_CODE (SET_SRC (curr_insn_set)) == SUBREG) + && (REG_P (SET_DEST (curr_insn_set)) + || MEM_P (SET_DEST (curr_insn_set)) + || GET_CODE (SET_DEST (curr_insn_set)) == SUBREG)))) optional_p = true; else continue; @@ -5439,7 +5443,7 @@ remove_inheritance_pseudos (bitmap remov static bool undo_optional_reloads (void) { - bool change_p; + bool change_p, keep_p; unsigned int regno, uid; bitmap_iterator bi, bi2; rtx insn, set, src, dest; @@ -5449,26 +5453,42 @@ undo_optional_reloads (void) bitmap_copy (&removed_optional_reload_pseudos, &lra_optional_reload_pseudos); EXECUTE_IF_SET_IN_BITMAP (&lra_optional_reload_pseudos, 0, regno, bi) if (reg_renumber[regno] >= 0) - EXECUTE_IF_SET_IN_BITMAP (&lra_reg_info[regno].insn_bitmap, 0, uid, bi2) - { - insn = lra_insn_recog_data[uid]->insn; - if ((set = single_set (insn)) == NULL_RTX) - continue; - src = SET_SRC (set); - dest = SET_DEST (set); - if (! REG_P (src) || ! REG_P (dest)) - continue; - if ((REGNO (src) == regno - && lra_reg_info[regno].restore_regno != (int) REGNO (dest)) - || (REGNO (dest) == regno - && lra_reg_info[regno].restore_regno != (int) REGNO (src))) - { - /* Optional reload was inherited. Keep it. */ - bitmap_clear_bit (&removed_optional_reload_pseudos, regno); - if (lra_dump_file != NULL) - fprintf (lra_dump_file, "Keep optional reload reg %d\n", regno); + { + keep_p = false; + if (reg_renumber[lra_reg_info[regno].restore_regno] >= 0) + /* If the original pseudo changed its allocation, just + removing the optional pseudo is dangerous as the original + pseudo will have longer live range. */ + keep_p = true; + else + EXECUTE_IF_SET_IN_BITMAP (&lra_reg_info[regno].insn_bitmap, 0, uid, bi2) + { + insn = lra_insn_recog_data[uid]->insn; + if ((set = single_set (insn)) == NULL_RTX) + continue; + src = SET_SRC (set); + dest = SET_DEST (set); + if (! REG_P (src) || ! REG_P (dest)) + continue; + if (REGNO (dest) == regno + /* Ignore insn for optional reloads itself. */ + && lra_reg_info[regno].restore_regno != (int) REGNO (src) + /* Check only inheritance on last inheritance pass. */ + && (int) REGNO (src) >= new_regno_start + /* Check that the optional reload was inherited. */ + && bitmap_bit_p (&lra_inheritance_pseudos, REGNO (src))) + { + keep_p = true; + break; + } } - } + if (keep_p) + { + bitmap_clear_bit (&removed_optional_reload_pseudos, regno); + if (lra_dump_file != NULL) + fprintf (lra_dump_file, "Keep optional reload reg %d\n", regno); + } + } change_p = ! bitmap_empty_p (&removed_optional_reload_pseudos); bitmap_initialize (&insn_bitmap, ®_obstack); EXECUTE_IF_SET_IN_BITMAP (&removed_optional_reload_pseudos, 0, regno, bi) @@ -5550,7 +5570,11 @@ lra_undo_inheritance (void) if (lra_reg_info[regno].restore_regno >= 0) { n_all_inherit++; - if (reg_renumber[regno] < 0) + if (reg_renumber[regno] < 0 + /* If the original pseudo changed its allocation, just + removing inheritance is dangerous as for changing + allocation we used shorter live-ranges. */ + && reg_renumber[lra_reg_info[regno].restore_regno] < 0) bitmap_set_bit (&remove_pseudos, regno); else n_inherit++;