With the above two options, use a temporary register regno (as returned from split_stack_prologue_scratch_regno) as an indirect call scratch register to hold __morestack function address. On 64-bit targets, two temporary registers are always available, so load the function address in %r11 and call __morestack_large_model with its one-argument-register value in %r10. On 32-bit targets, bail out with a "sorry" if the temporary register can not be obtained.
On 32-bit targets, also emit a PIC sequence that re-uses the obtained indirect call scratch register before moving the function address to it. We can not set up %ebx PIC register in this case, but __morestack is prepared for this situation and sets it up by itself. PR target/89316 gcc/ChangeLog: * config/i386/i386.cc (ix86_expand_split_stack_prologue): Obtain scratch regno when flag_force_indirect_call is set. On 64-bit targets, call __morestack_large_model when flag_force_indirect_call is set and on 32-bit targets with -fpic, manually expand PIC sequence to call __morestack. Move the function address to an indirect call scratch register. gcc/testsuite/ChangeLog: * g++.target/i386/pr89316.C: New test. * gcc.target/i386/pr112605-1.c: New test. * gcc.target/i386/pr112605-2.c: New test. * gcc.target/i386/pr112605.c: New test. Jakub, I'm not entirely sure x86_32 PIC sequence is 100% correct (please note that the missing %ebx setup situation is handled in __morestack), so I'd be very grateful for your review of this part. Uros.
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 683ac643bc8..a37f683d895 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -10353,6 +10353,7 @@ ix86_expand_split_stack_prologue (void) rtx_code_label *label; rtx limit, current, allocate_rtx, call_fusage; rtx_insn *call_insn; + unsigned int scratch_regno = INVALID_REGNUM; rtx scratch_reg = NULL_RTX; rtx_code_label *varargs_label = NULL; rtx fn; @@ -10375,11 +10376,16 @@ ix86_expand_split_stack_prologue (void) limit = ix86_split_stack_guard (); - if (allocate < SPLIT_STACK_AVAILABLE) - current = stack_pointer_rtx; - else + if (allocate >= SPLIT_STACK_AVAILABLE + || flag_force_indirect_call) + { + scratch_regno = split_stack_prologue_scratch_regno (); + if (scratch_regno == INVALID_REGNUM) + return; + } + + if (allocate >= SPLIT_STACK_AVAILABLE) { - unsigned int scratch_regno; rtx offset; /* We need a scratch register to hold the stack pointer minus @@ -10387,9 +10393,7 @@ ix86_expand_split_stack_prologue (void) function, the scratch register can be any caller-saved register which is not used for parameters. */ offset = GEN_INT (- allocate); - scratch_regno = split_stack_prologue_scratch_regno (); - if (scratch_regno == INVALID_REGNUM) - return; + scratch_reg = gen_rtx_REG (Pmode, scratch_regno); if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode)) { @@ -10407,6 +10411,8 @@ ix86_expand_split_stack_prologue (void) } current = scratch_reg; } + else + current = stack_pointer_rtx; ix86_expand_branch (GEU, current, limit, label); rtx_insn *jump_insn = get_last_insn (); @@ -10435,8 +10441,8 @@ ix86_expand_split_stack_prologue (void) { rtx reg10, reg11; - reg10 = gen_rtx_REG (Pmode, R10_REG); - reg11 = gen_rtx_REG (Pmode, R11_REG); + reg10 = gen_rtx_REG (DImode, R10_REG); + reg11 = gen_rtx_REG (DImode, R11_REG); /* If this function uses a static chain, it will be in %r10. Preserve it across the call to __morestack. */ @@ -10449,32 +10455,25 @@ ix86_expand_split_stack_prologue (void) use_reg (&call_fusage, rax); } - if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) - && !TARGET_PECOFF) + if (flag_force_indirect_call + || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) { HOST_WIDE_INT argval; - gcc_assert (Pmode == DImode); - /* When using the large model we need to load the address - into a register, and we've run out of registers. So we - switch to a different calling convention, and we call a - different function: __morestack_large. We pass the - argument size in the upper 32 bits of r10 and pass the - frame size in the lower 32 bits. */ - gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate); - gcc_assert ((args_size & 0xffffffff) == args_size); - if (split_stack_fn_large == NULL_RTX) { split_stack_fn_large = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model"); SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL; } + if (ix86_cmodel == CM_LARGE_PIC) { rtx_code_label *label; rtx x; + gcc_assert (Pmode == DImode); + label = gen_label_rtx (); emit_label (label); LABEL_PRESERVE_P (label) = 1; @@ -10487,12 +10486,19 @@ ix86_expand_split_stack_prologue (void) emit_move_insn (reg11, x); x = gen_rtx_PLUS (Pmode, reg10, reg11); x = gen_const_mem (Pmode, x); - emit_move_insn (reg11, x); + fn = copy_to_suggested_reg (x, reg11, Pmode); } else - emit_move_insn (reg11, split_stack_fn_large); + fn = split_stack_fn_large; - fn = reg11; + /* When using the large model we need to load the address + into a register, and we've run out of registers. So we + switch to a different calling convention, and we call a + different function: __morestack_large. We pass the + argument size in the upper 32 bits of r10 and pass the + frame size in the lower 32 bits. */ + gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate); + gcc_assert ((args_size & 0xffffffff) == args_size); argval = ((args_size << 16) << 16) + allocate; emit_move_insn (reg10, GEN_INT (argval)); @@ -10508,12 +10514,40 @@ ix86_expand_split_stack_prologue (void) } else { + if (flag_force_indirect_call && flag_pic) + { + rtx x; + + gcc_assert (Pmode == SImode); + + scratch_reg = gen_rtx_REG (Pmode, scratch_regno); + + emit_insn (gen_set_got (scratch_reg)); + x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn), + UNSPEC_GOT); + x = gen_rtx_CONST (Pmode, x); + x = gen_rtx_PLUS (Pmode, scratch_reg, x); + x = gen_const_mem (Pmode, x); + fn = copy_to_suggested_reg (x, scratch_reg, Pmode); + } + rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size))); add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD)); insn = emit_insn (gen_push (allocate_rtx)); add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD)); pop = GEN_INT (2 * UNITS_PER_WORD); } + + if (flag_force_indirect_call && !register_operand (fn, VOIDmode)) + { + scratch_reg = gen_rtx_REG (word_mode, scratch_regno); + + if (GET_MODE (fn) != word_mode) + fn = gen_rtx_ZERO_EXTEND (word_mode, fn); + + fn = copy_to_suggested_reg (fn, scratch_reg, word_mode); + } + call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn), GEN_INT (UNITS_PER_WORD), constm1_rtx, pop, false); @@ -10558,7 +10592,6 @@ ix86_expand_split_stack_prologue (void) slot. */ if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) { - unsigned int scratch_regno; rtx frame_reg; int words; diff --git a/gcc/testsuite/g++.target/i386/pr89316.C b/gcc/testsuite/g++.target/i386/pr89316.C new file mode 100644 index 00000000000..2f05ef751a5 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr89316.C @@ -0,0 +1,6 @@ +// PR target/89316 +// { dg-do compile } +// { dg-require-effective-target split_stack } +// { dg-options "-fsplit-stack -mforce-indirect-call" } + +struct foo { foo (); } foobar; diff --git a/gcc/testsuite/gcc.target/i386/pr112605-1.c b/gcc/testsuite/gcc.target/i386/pr112605-1.c new file mode 100644 index 00000000000..57b91909f0c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112605-1.c @@ -0,0 +1,7 @@ +/* PR target/112605 */ +/* { dg-do compile } */ +/* { dg-require-effective-target split_stack } */ +/* { dg-require-effective-target fpic } */ +/* { dg-options "-fsplit-stack -fpic -mforce-indirect-call" } */ + +#include "pr112605.c" diff --git a/gcc/testsuite/gcc.target/i386/pr112605-2.c b/gcc/testsuite/gcc.target/i386/pr112605-2.c new file mode 100644 index 00000000000..7db9aba3abc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112605-2.c @@ -0,0 +1,7 @@ +/* PR target/112605 */ +/* { dg-do compile { target lp64 } } */ +/* { dg-require-effective-target split_stack } */ +/* { dg-require-effective-target fpic } */ +/* { dg-options "-fsplit-stack -fpic -mcmodel=large -mforce-indirect-call " } */ + +#include "pr112605.c" diff --git a/gcc/testsuite/gcc.target/i386/pr112605.c b/gcc/testsuite/gcc.target/i386/pr112605.c new file mode 100644 index 00000000000..da50a0f6d94 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112605.c @@ -0,0 +1,24 @@ +/* PR target/112605 */ +/* { dg-do compile } */ +/* { dg-require-effective-target split_stack } */ +/* { dg-options "-fsplit-stack -mforce-indirect-call" } */ + +int x; +int y; + +void __attribute__((noinline)) f1(void) +{ + x++; +} + +static __attribute__((noinline)) void f3(void) +{ + y++; +} + +void f2() +{ + f1(); + f3(); + f1(); +}