Hello, current mainline miscompiles -fPIC code on SPU, which leads to wrong code in libgcc, causing a large number of test failures.
The problem is in the back-end's get_pic_reg routine which implements an optimization: in a leaf function, if register 74 is unused so far, use that register instead of the standard register 124 as PIC register. This helps becaues 74 is call-clobbered, and thus no prologue/epilogue code is needed. However, there is a problem in how the "register 74 is unused so far" check is implemented: with current mainline, this check succeeds in the post-reload splitter where uses of the PIC register are introduced. However, that same check is later repeated in prologue generation in order to emit code to set up the PIC register. At this point, register 74 is now considered as in use. This is true, of course, since it is in use by the code that was added in the post-reload splitter ... It would appear this used to work in the past because DF information was not updated between the post-reload splitter and prologue emitter passes. This seems to have changed recently (maybe due to the shrink- wrapping pass?), and now the back-end bug is exposed. Fortunately, the fix is simple: we perform the DF check only once and remember the result in cfun->machine. The patch below implements this. Tested on spu-elf, fixes many testcase failures. Committed to mainline. Bye, Ulrich ChangeLog: * config/spu/spu.c (struct machine_function): New data structure. (spu_init_machine_status): New function. (spu_option_override): Install it. (get_pic_reg): Set and use cfun->machine->pic_reg. (spu_split_immediate): Do not set crtl->uses_pic_offset_table. (need_to_save_reg): Use cfun->machine->pic_reg instead of checking crtl->uses_pic_offset_table. (spu_expand_prologue): Likewise. Index: gcc/config/spu/spu.c =================================================================== *** gcc/config/spu/spu.c (revision 179977) --- gcc/config/spu/spu.c (working copy) *************** static void spu_setup_incoming_varargs ( *** 500,509 **** --- 500,526 ---- struct gcc_target targetm = TARGET_INITIALIZER; + /* Define the structure for the machine field in struct function. */ + struct GTY(()) machine_function + { + /* Register to use for PIC accesses. */ + rtx pic_reg; + }; + + /* How to allocate a 'struct machine_function'. */ + static struct machine_function * + spu_init_machine_status (void) + { + return ggc_alloc_cleared_machine_function (); + } + /* Implement TARGET_OPTION_OVERRIDE. */ static void spu_option_override (void) { + /* Set up function hooks. */ + init_machine_status = spu_init_machine_status; + /* Small loops will be unpeeled at -O3. For SPU it is more important to keep code small by default. */ if (!flag_unroll_loops && !flag_peel_loops) *************** print_operand (FILE * file, rtx x, int c *** 1741,1752 **** static rtx get_pic_reg (void) { - rtx pic_reg = pic_offset_table_rtx; if (!reload_completed && !reload_in_progress) abort (); ! if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM)) ! pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM); ! return pic_reg; } /* Split constant addresses to handle cases that are too large. --- 1758,1779 ---- static rtx get_pic_reg (void) { if (!reload_completed && !reload_in_progress) abort (); ! ! /* If we've already made the decision, we need to keep with it. Once we've ! decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may ! return true since the register is now live; this should not cause us to ! "switch back" to using pic_offset_table_rtx. */ ! if (!cfun->machine->pic_reg) ! { ! if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM)) ! cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM); ! else ! cfun->machine->pic_reg = pic_offset_table_rtx; ! } ! ! return cfun->machine->pic_reg; } /* Split constant addresses to handle cases that are too large. *************** spu_split_immediate (rtx * ops) *** 1849,1855 **** { rtx pic_reg = get_pic_reg (); emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg)); - crtl->uses_pic_offset_table = 1; } return flag_pic || c == IC_IL2s; } --- 1876,1881 ---- *************** need_to_save_reg (int regno, int saving) *** 1875,1883 **** return 1; if (flag_pic && regno == PIC_OFFSET_TABLE_REGNUM ! && (!saving || crtl->uses_pic_offset_table) ! && (!saving ! || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM))) return 1; return 0; } --- 1901,1907 ---- return 1; if (flag_pic && regno == PIC_OFFSET_TABLE_REGNUM ! && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx)) return 1; return 0; } *************** spu_expand_prologue (void) *** 1991,1998 **** rtx scratch_reg_0, scratch_reg_1; rtx insn, real; ! if (flag_pic && optimize == 0) ! crtl->uses_pic_offset_table = 1; if (spu_naked_function_p (current_function_decl)) return; --- 2015,2022 ---- rtx scratch_reg_0, scratch_reg_1; rtx insn, real; ! if (flag_pic && optimize == 0 && !cfun->machine->pic_reg) ! cfun->machine->pic_reg = pic_offset_table_rtx; if (spu_naked_function_p (current_function_decl)) return; *************** spu_expand_prologue (void) *** 2029,2037 **** } } ! if (flag_pic && crtl->uses_pic_offset_table) { ! rtx pic_reg = get_pic_reg (); insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0)); insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0)); } --- 2053,2061 ---- } } ! if (flag_pic && cfun->machine->pic_reg) { ! rtx pic_reg = cfun->machine->pic_reg; insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0)); insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0)); } -- Dr. Ulrich Weigand GNU Toolchain for Linux on System z and Cell BE ulrich.weig...@de.ibm.com