libgcc/ChangeLog:
* config.host: Use t-stack and t-stack-s390 for s390*-*-linux.
* config/s390/morestack.S: New file.
* config/s390/t-stack-s390: New file.
* generic-morestack.c (__splitstack_find): Add s390-specific code.
gcc/ChangeLog:
* common/config/s390/s390-common.c (s390_supports_split_stack):
New function.
(TARGET_SUPPORTS_SPLIT_STACK): New macro.
* config/s390/s390-protos.h: Add s390_expand_split_stack_prologue.
* config/s390/s390.c (struct machine_function): New field
split_stack_varargs_pointer.
(s390_split_branches): Don't split split-stack pseudo-insns, rewire
split-stack prologue conditional jump instead of splitting it.
(s390_chunkify_start): Don't reload const pool register on split-stack
prologue conditional jumps.
(s390_register_info): Mark r12 as clobbered if it'll be used as temp
in s390_emit_prologue.
(s390_emit_prologue): Use r12 as temp if r1 is taken by split-stack
vararg pointer.
(morestack_ref): New global.
(SPLIT_STACK_AVAILABLE): New macro.
(s390_expand_split_stack_prologue): New function.
(s390_expand_split_stack_call_esa): New function.
(s390_expand_split_stack_call_zarch): New function.
(s390_live_on_entry): New function.
(s390_va_start): Use split-stack vararg pointer if appropriate.
(s390_reorg): Lower the split-stack pseudo-insns.
(s390_asm_file_end): Emit the split-stack note sections.
(TARGET_EXTRA_LIVE_ON_ENTRY): New macro.
* config/s390/s390.md: (UNSPEC_STACK_CHECK): New unspec.
(UNSPECV_SPLIT_STACK_CALL_ZARCH): New unspec.
(UNSPECV_SPLIT_STACK_CALL_ESA): New unspec.
(UNSPECV_SPLIT_STACK_SIBCALL): New unspec.
(UNSPECV_SPLIT_STACK_MARKER): New unspec.
(split_stack_prologue): New expand.
(split_stack_call_esa): New insn.
(split_stack_call_zarch_*): New insn.
(split_stack_cond_call_zarch_*): New insn.
(split_stack_space_check): New expand.
(split_stack_sibcall_basr): New insn.
(split_stack_sibcall_*): New insn.
(split_stack_cond_sibcall_*): New insn.
(split_stack_marker): New insn.
---
gcc/ChangeLog | 41 ++
gcc/common/config/s390/s390-common.c | 14 +
gcc/config/s390/s390-protos.h | 1 +
gcc/config/s390/s390.c | 538 +++++++++++++++++++++++++-
gcc/config/s390/s390.md | 133 +++++++
libgcc/ChangeLog | 7 +
libgcc/config.host | 4 +-
libgcc/config/s390/morestack.S | 718 +++++++++++++++++++++++++++++++++++
libgcc/config/s390/t-stack-s390 | 2 +
libgcc/generic-morestack.c | 4 +
10 files changed, 1454 insertions(+), 8 deletions(-)
create mode 100644 libgcc/config/s390/morestack.S
create mode 100644 libgcc/config/s390/t-stack-s390
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4c7046f..a4f4dff 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,46 @@
2016-01-02 Marcin Kościelnicki <[email protected]>
+ * common/config/s390/s390-common.c (s390_supports_split_stack):
+ New function.
+ (TARGET_SUPPORTS_SPLIT_STACK): New macro.
+ * config/s390/s390-protos.h: Add s390_expand_split_stack_prologue.
+ * config/s390/s390.c (struct machine_function): New field
+ split_stack_varargs_pointer.
+ (s390_split_branches): Don't split split-stack pseudo-insns, rewire
+ split-stack prologue conditional jump instead of splitting it.
+ (s390_chunkify_start): Don't reload const pool register on split-stack
+ prologue conditional jumps.
+ (s390_register_info): Mark r12 as clobbered if it'll be used as temp
+ in s390_emit_prologue.
+ (s390_emit_prologue): Use r12 as temp if r1 is taken by split-stack
+ vararg pointer.
+ (morestack_ref): New global.
+ (SPLIT_STACK_AVAILABLE): New macro.
+ (s390_expand_split_stack_prologue): New function.
+ (s390_expand_split_stack_call_esa): New function.
+ (s390_expand_split_stack_call_zarch): New function.
+ (s390_live_on_entry): New function.
+ (s390_va_start): Use split-stack vararg pointer if appropriate.
+ (s390_reorg): Lower the split-stack pseudo-insns.
+ (s390_asm_file_end): Emit the split-stack note sections.
+ (TARGET_EXTRA_LIVE_ON_ENTRY): New macro.
+ * config/s390/s390.md: (UNSPEC_STACK_CHECK): New unspec.
+ (UNSPECV_SPLIT_STACK_CALL_ZARCH): New unspec.
+ (UNSPECV_SPLIT_STACK_CALL_ESA): New unspec.
+ (UNSPECV_SPLIT_STACK_SIBCALL): New unspec.
+ (UNSPECV_SPLIT_STACK_MARKER): New unspec.
+ (split_stack_prologue): New expand.
+ (split_stack_call_esa): New insn.
+ (split_stack_call_zarch_*): New insn.
+ (split_stack_cond_call_zarch_*): New insn.
+ (split_stack_space_check): New expand.
+ (split_stack_sibcall_basr): New insn.
+ (split_stack_sibcall_*): New insn.
+ (split_stack_cond_sibcall_*): New insn.
+ (split_stack_marker): New insn.
+
+2016-01-02 Marcin Kościelnicki <[email protected]>
+
* cfgrtl.c (rtl_tidy_fallthru_edge): Bail for unconditional jumps
with side effects.
diff --git a/gcc/common/config/s390/s390-common.c
b/gcc/common/config/s390/s390-common.c
index 4cf0df7..0c468bf 100644
--- a/gcc/common/config/s390/s390-common.c
+++ b/gcc/common/config/s390/s390-common.c
@@ -105,6 +105,17 @@ s390_handle_option (struct gcc_options *opts
ATTRIBUTE_UNUSED,
}
}
+/* -fsplit-stack uses a field in the TCB, available with glibc-2.23.
+ We don't verify it, since earlier versions just have padding at
+ its place, which works just as well. */
+
+static bool
+s390_supports_split_stack (bool report ATTRIBUTE_UNUSED,
+ struct gcc_options *opts ATTRIBUTE_UNUSED)
+{
+ return true;
+}
+
#undef TARGET_DEFAULT_TARGET_FLAGS
#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
@@ -117,4 +128,7 @@ s390_handle_option (struct gcc_options *opts
ATTRIBUTE_UNUSED,
#undef TARGET_OPTION_INIT_STRUCT
#define TARGET_OPTION_INIT_STRUCT s390_option_init_struct
+#undef TARGET_SUPPORTS_SPLIT_STACK
+#define TARGET_SUPPORTS_SPLIT_STACK s390_supports_split_stack
+
struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 962abb1..936e267 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -42,6 +42,7 @@ extern bool s390_handle_option (struct gcc_options *opts
ATTRIBUTE_UNUSED,
extern HOST_WIDE_INT s390_initial_elimination_offset (int, int);
extern void s390_emit_prologue (void);
extern void s390_emit_epilogue (bool);
+extern void s390_expand_split_stack_prologue (void);
extern bool s390_can_use_simple_return_insn (void);
extern bool s390_can_use_return_insn (void);
extern void s390_function_profiler (FILE *, int);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 9dc8d1e..0255eec 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -426,6 +426,13 @@ struct GTY(()) machine_function
/* True if the current function may contain a tbegin clobbering
FPRs. */
bool tbegin_p;
+
+ /* For -fsplit-stack support: A stack local which holds a pointer to
+ the stack arguments for a function with a variable number of
+ arguments. This is set at the start of the function and is used
+ to initialize the overflow_arg_area field of the va_list
+ structure. */
+ rtx split_stack_varargs_pointer;
};
/* Few accessor macros for struct cfun->machine->s390_frame_layout. */
@@ -7669,7 +7676,17 @@ s390_split_branches (void)
pat = PATTERN (insn);
if (GET_CODE (pat) == PARALLEL)
- pat = XVECEXP (pat, 0, 0);
+ {
+ /* Split stack call pseudo-jump doesn't need splitting. */
+ if (GET_CODE (XVECEXP (pat, 0, 1)) == SET
+ && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 1)) == UNSPEC_VOLATILE
+ && (XINT (XEXP (XVECEXP (pat, 0, 1), 1), 1)
+ == UNSPECV_SPLIT_STACK_CALL_ESA
+ || XINT (XEXP (XVECEXP (pat, 0, 1), 1), 1)
+ == UNSPECV_SPLIT_STACK_CALL_ZARCH))
+ continue;
+ pat = XVECEXP (pat, 0, 0);
+ }
if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
continue;
@@ -7692,6 +7709,49 @@ s390_split_branches (void)
if (get_attr_length (insn) <= 4)
continue;
+ if (prologue_epilogue_contains (insn))
+ {
+ /* A jump in prologue/epilogue must come from the split-stack
+ prologue. It cannot be split - there are no scratch regs
+ available at that point. Rewire it instead. */
+
+ rtx_insn *code_label = (rtx_insn *)XEXP (*label, 0);
+ gcc_assert (LABEL_P (code_label));
+ rtx_insn *note = NEXT_INSN (code_label);
+ gcc_assert (NOTE_P (note));
+ rtx_insn *jump_ss = NEXT_INSN (note);
+ gcc_assert (JUMP_P (jump_ss));
+ rtx_insn *barrier = NEXT_INSN (jump_ss);
+ gcc_assert (BARRIER_P (barrier));
+ gcc_assert (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE);
+ gcc_assert (GET_CODE (XEXP (SET_SRC (pat), 0)) == LT);
+
+ /* step 1 - insert new label after */
+ rtx new_label = gen_label_rtx ();
+ emit_label_after (new_label, insn);
+
+ /* step 2 - reorder */
+ reorder_insns_nobb (code_label, barrier, insn);
+
+ /* step 3 - retarget jump */
+ rtx new_target = gen_rtx_LABEL_REF (VOIDmode, new_label);
+ ret = validate_change (insn, label, new_target, 0);
+ gcc_assert (ret);
+ LABEL_NUSES (new_label)++;
+ LABEL_NUSES (code_label)--;
+ JUMP_LABEL (insn) = new_label;
+
+ /* step 4 - invert jump cc */
+ rtx *pcond = &XEXP (SET_SRC (pat), 0);
+ rtx new_cond = gen_rtx_fmt_ee (GE, VOIDmode,
+ XEXP (*pcond, 0),
+ XEXP (*pcond, 1));
+ ret = validate_change (insn, pcond, new_cond, 0);
+ gcc_assert (ret);
+
+ continue;
+ }
+
/* We are going to use the return register as scratch register,
make sure it will be saved/restored by the prologue/epilogue. */
cfun_frame_layout.save_return_addr_p = 1;
@@ -8736,7 +8796,7 @@ s390_chunkify_start (void)
}
/* If we have a direct jump (conditional or unconditional),
check all potential targets. */
- else if (JUMP_P (insn))
+ else if (JUMP_P (insn) && !prologue_epilogue_contains (insn))
{
rtx pat = PATTERN (insn);
@@ -9316,9 +9376,13 @@ s390_register_info ()
cfun_frame_layout.high_fprs++;
}
- if (flag_pic)
- clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
- |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
+ /* Register 12 is used for GOT address, but also as temp in prologue
+ for split-stack stdarg functions (unless r14 is available). */
+ clobbered_regs[12]
+ |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+ || (flag_split_stack && cfun->stdarg
+ && (crtl->is_leaf || TARGET_TPF_PROFILING
+ || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
clobbered_regs[BASE_REGNUM]
|= (cfun->machine->base_reg
@@ -10446,6 +10510,8 @@ s390_emit_prologue (void)
&& !crtl->is_leaf
&& !TARGET_TPF_PROFILING)
temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
+ else if (flag_split_stack && cfun->stdarg)
+ temp_reg = gen_rtx_REG (Pmode, 12);
else
temp_reg = gen_rtx_REG (Pmode, 1);
@@ -10939,6 +11005,386 @@ s300_set_up_by_prologue (hard_reg_set_container *regs)
SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
}
+/* -fsplit-stack support. */
+
+/* A SYMBOL_REF for __morestack. */
+static GTY(()) rtx morestack_ref;
+
+/* When using -fsplit-stack, the allocation routines set a field in
+ the TCB to the bottom of the stack plus this much space, measured
+ in bytes. */
+
+#define SPLIT_STACK_AVAILABLE 1024
+
+/* Emit -fsplit-stack prologue, which goes before the regular function
+ prologue. */
+
+void
+s390_expand_split_stack_prologue (void)
+{
+ rtx r1, guard, cc;
+ rtx_insn *insn;
+ /* Offset from thread pointer to __private_ss. */
+ int psso = TARGET_64BIT ? 0x38 : 0x20;
+ /* Pointer size in bytes. */
+ /* Frame size and argument size - the two parameters to __morestack. */
+ HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
+ /* Align argument size to 8 bytes - simplifies __morestack code. */
+ HOST_WIDE_INT args_size = crtl->args.size >= 0
+ ? ((crtl->args.size + 7) & ~7)
+ : 0;
+ /* Label to jump to when no __morestack call is necessary. */
+ rtx_code_label *enough = NULL;
+ /* Label to be called by __morestack. */
+ rtx_code_label *call_done = NULL;
+ /* 1 if __morestack called conditionally, 0 if always. */
+ int conditional = 0;
+
+ gcc_assert (flag_split_stack && reload_completed);
+
+ r1 = gen_rtx_REG (Pmode, 1);
+
+ /* If no stack frame will be allocated, don't do anything. */
+ if (!frame_size)
+ {
+ /* But emit a marker that will let linker and indirect function
+ calls recognise this function as split-stack aware. */
+ emit_insn(gen_split_stack_marker());
+ if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
+ {
+ /* If va_start is used, just use r15. */
+ emit_move_insn (r1,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ GEN_INT (STACK_POINTER_OFFSET)));
+ }
+ return;
+ }
+
+ if (morestack_ref == NULL_RTX)
+ {
+ morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
+ SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
+ | SYMBOL_FLAG_FUNCTION);
+ }
+
+ if (frame_size <= 0x7fff || (TARGET_EXTIMM && frame_size <= 0xffffffffu))
+ {
+ /* If frame_size will fit in an add instruction, do a stack space
+ check, and only call __morestack if there's not enough space. */
+ conditional = 1;
+
+ /* Get thread pointer. r1 is the only register we can always destroy -
r0
+ could contain a static chain (and cannot be used to address memory
+ anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved.
*/
+ emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
+ /* Aim at __private_ss. */
+ guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
+
+ /* If less that 1kiB used, skip addition and compare directly with
+ __private_ss. */
+ if (frame_size > SPLIT_STACK_AVAILABLE)
+ {
+ emit_move_insn (r1, guard);
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (r1, r1, GEN_INT(frame_size)));
+ else
+ emit_insn (gen_addsi3 (r1, r1, GEN_INT(frame_size)));
+ guard = r1;
+ }
+
+ if (TARGET_CPU_ZARCH)
+ {
+ rtx tmp;
+
+ /* Compare the (maybe adjusted) guard with the stack pointer. */
+ cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
+
+ call_done = gen_label_rtx ();
+
+ if (TARGET_64BIT)
+ tmp = gen_split_stack_cond_call_zarch_di (call_done,
+ morestack_ref,
+ GEN_INT (frame_size),
+ GEN_INT (args_size),
+ cc);
+ else
+ tmp = gen_split_stack_cond_call_zarch_si (call_done,
+ morestack_ref,
+ GEN_INT (frame_size),
+ GEN_INT (args_size),
+ cc);
+
+
+ insn = emit_jump_insn (tmp);
+ JUMP_LABEL (insn) = call_done;
+
+ /* Mark the jump as very unlikely to be taken. */
+ add_int_reg_note (insn, REG_BR_PROB, REG_BR_PROB_BASE / 100);
+ }
+ else
+ {
+ /* Compare the (maybe adjusted) guard with the stack pointer. */
+ cc = s390_emit_compare (GE, stack_pointer_rtx, guard);
+
+ enough = gen_label_rtx ();
+ insn = s390_emit_jump (enough, cc);
+ JUMP_LABEL (insn) = enough;
+
+ /* Mark the jump as very likely to be taken. */
+ add_int_reg_note (insn, REG_BR_PROB,
+ REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
+ }
+ }
+
+ if (call_done == NULL)
+ {
+ rtx tmp;
+ call_done = gen_label_rtx ();
+
+ /* Now, we need to call __morestack. It has very special calling
+ conventions: it preserves param/return/static chain registers for
+ calling main function body, and looks for its own parameters
+ at %r1 (after aligning it up to a 4 byte bounduary for 31-bit mode).
*/
+ if (TARGET_64BIT)
+ tmp = gen_split_stack_call_zarch_di (call_done,
+ morestack_ref,
+ GEN_INT (frame_size),
+ GEN_INT (args_size));
+ else if (TARGET_CPU_ZARCH)
+ tmp = gen_split_stack_call_zarch_si (call_done,
+ morestack_ref,
+ GEN_INT (frame_size),
+ GEN_INT (args_size));
+ else
+ tmp = gen_split_stack_call_esa (call_done,
+ morestack_ref,
+ GEN_INT (frame_size),
+ GEN_INT (args_size));
+ insn = emit_jump_insn (tmp);
+ JUMP_LABEL (insn) = call_done;
+ emit_barrier ();
+ }
+
+ /* __morestack will call us here. */
+
+ if (enough != NULL)
+ {
+ emit_label (enough);
+ LABEL_NUSES (enough) = 1;
+ }
+
+ if (conditional && cfun->machine->split_stack_varargs_pointer != NULL_RTX)
+ {
+ /* If va_start is used, and __morestack was not called, just use r15. */
+ emit_move_insn (r1,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ GEN_INT (STACK_POINTER_OFFSET)));
+ }
+
+ emit_label (call_done);
+ LABEL_NUSES (call_done) = 1;
+}
+
+/* Generates split-stack call sequence for esa mode, along with its parameter
+ block. */
+
+static void
+s390_expand_split_stack_call_esa (rtx_insn *orig_insn,
+ rtx call_done,
+ rtx function,
+ rtx frame_size,
+ rtx args_size)
+{
+ int psize = GET_MODE_SIZE (Pmode);
+ /* Labels for literal base, literal __morestack, param base. */
+ rtx litbase = gen_label_rtx();
+ rtx litms = gen_label_rtx();
+ rtx parmbase = gen_label_rtx();
+ rtx r1 = gen_rtx_REG (Pmode, 1);
+ rtx_insn *insn = orig_insn;
+ rtx tmp, tmp2;
+
+ /* No brasl, we have to make do using basr and a literal pool. */
+
+ /* %r1 = litbase. */
+ insn = emit_insn_after (gen_main_base_31_small (r1, litbase), insn);
+ insn = emit_label_after (litbase, insn);
+
+ /* a %r1, .Llitms-.Llitbase(%r1) */
+ tmp = gen_rtx_LABEL_REF (Pmode, litbase);
+ tmp2 = gen_rtx_LABEL_REF (Pmode, litms);
+ tmp = gen_rtx_UNSPEC (Pmode,
+ gen_rtvec (2, tmp2, tmp),
+ UNSPEC_POOL_OFFSET);
+ tmp = gen_rtx_CONST (Pmode, tmp);
+ tmp = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, r1, tmp));
+ insn = emit_insn_after (gen_addsi3 (r1, r1, tmp), insn);
+ add_reg_note (insn, REG_LABEL_OPERAND, litbase);
+ add_reg_note (insn, REG_LABEL_OPERAND, litms);
+ LABEL_NUSES (litbase)++;
+ LABEL_NUSES (litms)++;
+
+ /* basr %r1, %r1 */
+ tmp = gen_split_stack_sibcall_basr (r1, call_done);
+ insn = emit_jump_insn_after (tmp, insn);
+ JUMP_LABEL (insn) = call_done;
+ LABEL_NUSES (call_done)++;
+
+ /* __morestack will mangle its return register to get our parameters. */
+
+ /* Now, we'll emit parameters to __morestack. First, align to pointer size
+ (this mirrors the alignment done in __morestack - don't touch it). */
+ insn = emit_insn_after (gen_pool_align (GEN_INT (psize)), insn);
+
+ insn = emit_label_after (parmbase, insn);
+
+ tmp = gen_rtx_UNSPEC_VOLATILE (Pmode,
+ gen_rtvec (1, frame_size),
+ UNSPECV_POOL_ENTRY);
+ insn = emit_insn_after (tmp, insn);
+
+ /* Second parameter is size of the arguments passed on stack that
+ __morestack has to copy to the new stack (does not include varargs). */
+ tmp = gen_rtx_UNSPEC_VOLATILE (Pmode,
+ gen_rtvec (1, args_size),
+ UNSPECV_POOL_ENTRY);
+ insn = emit_insn_after (tmp, insn);
+
+ /* Third parameter is offset between start of the parameter block
+ and function body to be called by __morestack. */
+ tmp = gen_rtx_LABEL_REF (Pmode, parmbase);
+ tmp2 = gen_rtx_LABEL_REF (Pmode, call_done);
+ tmp = gen_rtx_CONST (Pmode,
+ gen_rtx_MINUS (Pmode, tmp2, tmp));
+ tmp = gen_rtx_UNSPEC_VOLATILE (Pmode,
+ gen_rtvec (1, tmp),
+ UNSPECV_POOL_ENTRY);
+ insn = emit_insn_after (tmp, insn);
+ add_reg_note (insn, REG_LABEL_OPERAND, call_done);
+ LABEL_NUSES (call_done)++;
+ add_reg_note (insn, REG_LABEL_OPERAND, parmbase);
+ LABEL_NUSES (parmbase)++;
+
+ /* We take advantage of the already-existing literal pool here to stuff
+ the __morestack address for use in the call above. */
+
+ insn = emit_label_after (litms, insn);
+
+ /* We actually emit __morestack - litbase to support PIC. Since it
+ works just as well for non-PIC, we use it in all cases. */
+
+ tmp = gen_rtx_LABEL_REF (Pmode, litbase);
+ tmp = gen_rtx_CONST (Pmode,
+ gen_rtx_MINUS (Pmode, function, tmp));
+ tmp = gen_rtx_UNSPEC_VOLATILE (Pmode,
+ gen_rtvec (1, tmp),
+ UNSPECV_POOL_ENTRY);
+ insn = emit_insn_after (tmp, insn);
+ add_reg_note (insn, REG_LABEL_OPERAND, litbase);
+ LABEL_NUSES (litbase)++;
+
+ delete_insn (orig_insn);
+}
+
+/* Generates split-stack call sequence for zarch mode, along with its parameter
+ block. */
+
+static void
+s390_expand_split_stack_call_zarch (rtx_insn *orig_insn,
+ rtx call_done,
+ rtx function,
+ rtx frame_size,
+ rtx args_size,
+ rtx cond)
+{
+ int psize = GET_MODE_SIZE (Pmode);
+ rtx_insn *insn = orig_insn;
+ rtx parmbase = gen_label_rtx();
+ rtx r1 = gen_rtx_REG (Pmode, 1);
+ rtx tmp, tmp2;
+
+ /* %r1 = litbase. */
+ insn = emit_insn_after (gen_main_base_64 (r1, parmbase), insn);
+ add_reg_note (insn, REG_LABEL_OPERAND, parmbase);
+ LABEL_NUSES (parmbase)++;
+
+ /* jg<cond> __morestack. */
+ if (cond == NULL)
+ {
+ if (TARGET_64BIT)
+ tmp = gen_split_stack_sibcall_di (function, call_done);
+ else
+ tmp = gen_split_stack_sibcall_si (function, call_done);
+ insn = emit_jump_insn_after (tmp, insn);
+ }
+ else
+ {
+ if (!s390_comparison (cond, VOIDmode))
+ internal_error ("bad split_stack_call_zarch cond");
+ if (TARGET_64BIT)
+ tmp = gen_split_stack_cond_sibcall_di (function, cond, call_done);
+ else
+ tmp = gen_split_stack_cond_sibcall_si (function, cond, call_done);
+ insn = emit_jump_insn_after (tmp, insn);
+ }
+ JUMP_LABEL (insn) = call_done;
+ LABEL_NUSES (call_done)++;
+
+ /* Go to .rodata. */
+ insn = emit_insn_after (gen_pool_section_start (), insn);
+
+ /* Now, we'll emit parameters to __morestack. First, align to pointer size
+ (this mirrors the alignment done in __morestack - don't touch it). */
+ insn = emit_insn_after (gen_pool_align (GEN_INT (psize)), insn);
+
+ insn = emit_label_after (parmbase, insn);
+
+ tmp = gen_rtx_UNSPEC_VOLATILE (Pmode,
+ gen_rtvec (1, frame_size),
+ UNSPECV_POOL_ENTRY);
+ insn = emit_insn_after (tmp, insn);
+
+ /* Second parameter is size of the arguments passed on stack that
+ __morestack has to copy to the new stack (does not include varargs). */
+ tmp = gen_rtx_UNSPEC_VOLATILE (Pmode,
+ gen_rtvec (1, args_size),
+ UNSPECV_POOL_ENTRY);
+ insn = emit_insn_after (tmp, insn);
+
+ /* Third parameter is offset between start of the parameter block
+ and function body to be called by __morestack. */
+ tmp = gen_rtx_LABEL_REF (Pmode, parmbase);
+ tmp2 = gen_rtx_LABEL_REF (Pmode, call_done);
+ tmp = gen_rtx_CONST (Pmode,
+ gen_rtx_MINUS (Pmode, tmp2, tmp));
+ tmp = gen_rtx_UNSPEC_VOLATILE (Pmode,
+ gen_rtvec (1, tmp),
+ UNSPECV_POOL_ENTRY);
+ insn = emit_insn_after (tmp, insn);
+ add_reg_note (insn, REG_LABEL_OPERAND, call_done);
+ LABEL_NUSES (call_done)++;
+ add_reg_note (insn, REG_LABEL_OPERAND, parmbase);
+ LABEL_NUSES (parmbase)++;
+
+ /* Return from .rodata. */
+ insn = emit_insn_after (gen_pool_section_end (), insn);
+
+ delete_insn (orig_insn);
+}
+
+/* We may have to tell the dataflow pass that the split stack prologue
+ is initializing a register. */
+
+static void
+s390_live_on_entry (bitmap regs)
+{
+ if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
+ {
+ gcc_assert (flag_split_stack);
+ bitmap_set_bit (regs, 1);
+ }
+}
+
/* Return true if the function can use simple_return to return outside
of a shrink-wrapped region. At present shrink-wrapping is supported
in all cases. */
@@ -11541,6 +11987,27 @@ s390_va_start (tree valist, rtx nextarg
ATTRIBUTE_UNUSED)
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
+ if (flag_split_stack
+ && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
+ == NULL)
+ && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
+ {
+ rtx reg;
+ rtx_insn *seq;
+
+ reg = gen_reg_rtx (Pmode);
+ cfun->machine->split_stack_varargs_pointer = reg;
+
+ start_sequence ();
+ emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
+ seq = get_insns ();
+ end_sequence ();
+
+ push_topmost_sequence ();
+ emit_insn_after (seq, entry_of_function ());
+ pop_topmost_sequence ();
+ }
+
/* Find the overflow area.
FIXME: This currently is too pessimistic when the vector ABI is
enabled. In that case we *always* set up the overflow area
@@ -11549,7 +12016,10 @@ s390_va_start (tree valist, rtx nextarg
ATTRIBUTE_UNUSED)
|| n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
|| TARGET_VX_ABI)
{
- t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
+ if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
+ t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
+ else
+ t = make_tree (TREE_TYPE (ovf),
cfun->machine->split_stack_varargs_pointer);
off = INTVAL (crtl->args.arg_offset_rtx);
off = off < 0 ? 0 : off;
@@ -13158,6 +13628,56 @@ s390_reorg (void)
}
}
+ if (flag_split_stack)
+ {
+ rtx_insn *insn;
+
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ /* Look for the split-stack fake jump instructions. */
+ if (!JUMP_P(insn))
+ continue;
+ if (GET_CODE (PATTERN (insn)) != PARALLEL
+ || XVECLEN (PATTERN (insn), 0) != 2)
+ continue;
+ rtx set = XVECEXP (PATTERN (insn), 0, 1);
+ if (GET_CODE (set) != SET)
+ continue;
+ rtx unspec = XEXP(set, 1);
+ if (GET_CODE (unspec) != UNSPEC_VOLATILE)
+ continue;
+ if (XINT (unspec, 1) != UNSPECV_SPLIT_STACK_CALL_ESA
+ && XINT (unspec, 1) != UNSPECV_SPLIT_STACK_CALL_ZARCH)
+ continue;
+ rtx set_pc = XVECEXP (PATTERN (insn), 0, 0);
+ rtx function = XVECEXP (unspec, 0, 0);
+ rtx frame_size = XVECEXP (unspec, 0, 1);
+ rtx args_size = XVECEXP (unspec, 0, 2);
+ rtx pc_src = XEXP (set_pc, 1);
+ rtx call_done, cond = NULL_RTX;
+ if (GET_CODE (pc_src) == IF_THEN_ELSE)
+ {
+ cond = XEXP (pc_src, 0);
+ call_done = XEXP (XEXP (pc_src, 1), 0);
+ }
+ else
+ call_done = XEXP (pc_src, 0);
+ if (XINT (unspec, 1) == UNSPECV_SPLIT_STACK_CALL_ESA)
+ s390_expand_split_stack_call_esa (insn,
+ call_done,
+ function,
+ frame_size,
+ args_size);
+ else
+ s390_expand_split_stack_call_zarch (insn,
+ call_done,
+ function,
+ frame_size,
+ args_size,
+ cond);
+ }
+ }
+
/* Try to optimize prologue and epilogue further. */
s390_optimize_prologue ();
@@ -14469,6 +14989,9 @@ s390_asm_file_end (void)
s390_vector_abi);
#endif
file_end_indicate_exec_stack ();
+
+ if (flag_split_stack)
+ file_end_indicate_split_stack ();
}
/* Return true if TYPE is a vector bool type. */
@@ -14724,6 +15247,9 @@ s390_invalid_binary_op (int op ATTRIBUTE_UNUSED,
const_tree type1, const_tree ty
#undef TARGET_SET_UP_BY_PROLOGUE
#define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
+#undef TARGET_EXTRA_LIVE_ON_ENTRY
+#define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
+
#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
s390_use_by_pieces_infrastructure_p
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 0ebefd6..15c6eed 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -114,6 +114,9 @@
UNSPEC_SP_SET
UNSPEC_SP_TEST
+ ; Split stack support
+ UNSPEC_STACK_CHECK
+
; Test Data Class (TDC)
UNSPEC_TDC_INSN
@@ -276,6 +279,12 @@
; Set and get floating point control register
UNSPECV_SFPC
UNSPECV_EFPC
+
+ ; Split stack support
+ UNSPECV_SPLIT_STACK_CALL_ZARCH
+ UNSPECV_SPLIT_STACK_CALL_ESA
+ UNSPECV_SPLIT_STACK_SIBCALL
+ UNSPECV_SPLIT_STACK_MARKER
])
;;
@@ -10909,3 +10918,127 @@
"TARGET_Z13"
"lcbb\t%0,%1,%b2"
[(set_attr "op_type" "VRX")])
+
+; Handle -fsplit-stack.
+
+(define_expand "split_stack_prologue"
+ [(const_int 0)]
+ ""
+{
+ s390_expand_split_stack_prologue ();
+ DONE;
+})
+
+(define_insn "split_stack_call_esa"
+ [(set (pc) (label_ref (match_operand 0 "" "")))
+ (set (reg:SI 1) (unspec_volatile [(match_operand 1 "bras_sym_operand" "X")
+ (match_operand 2 "consttable_operand" "X")
+ (match_operand 3 "consttable_operand"
"X")]
+ UNSPECV_SPLIT_STACK_CALL_ESA))]
+ "!TARGET_CPU_ZARCH"
+{
+ gcc_unreachable ();
+}
+ [(set_attr "length" "32")])
+
+(define_insn "split_stack_call_zarch_<mode>"
+ [(set (pc) (label_ref (match_operand 0 "" "")))
+ (set (reg:P 1) (unspec_volatile [(match_operand 1 "bras_sym_operand" "X")
+ (match_operand 2 "consttable_operand" "X")
+ (match_operand 3 "consttable_operand" "X")]
+ UNSPECV_SPLIT_STACK_CALL_ZARCH))]
+ "TARGET_CPU_ZARCH"
+{
+ gcc_unreachable ();
+}
+ [(set_attr "length" "12")])
+
+(define_insn "split_stack_cond_call_zarch_<mode>"
+ [(set (pc)
+ (if_then_else
+ (match_operand 4 "" "")
+ (label_ref (match_operand 0 "" ""))
+ (pc)))
+ (set (reg:P 1) (unspec_volatile [(match_operand 1 "bras_sym_operand" "X")
+ (match_operand 2 "consttable_operand" "X")
+ (match_operand 3 "consttable_operand" "X")]
+ UNSPECV_SPLIT_STACK_CALL_ZARCH))]
+ "TARGET_CPU_ZARCH"
+{
+ gcc_unreachable ();
+}
+ [(set_attr "length" "12")])
+
+;; If there are operand 0 bytes available on the stack, jump to
+;; operand 1.
+
+(define_expand "split_stack_space_check"
+ [(set (pc) (if_then_else
+ (ltu (minus (reg 15)
+ (match_operand 0 "register_operand"))
+ (unspec [(const_int 0)] UNSPEC_STACK_CHECK))
+ (label_ref (match_operand 1))
+ (pc)))]
+ ""
+{
+ /* Offset from thread pointer to __private_ss. */
+ int psso = TARGET_64BIT ? 0x38 : 0x20;
+ rtx tp = s390_get_thread_pointer ();
+ rtx guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, tp, psso));
+ rtx reg = gen_reg_rtx (Pmode);
+ rtx cc;
+ if (TARGET_64BIT)
+ emit_insn (gen_subdi3 (reg, stack_pointer_rtx, operands[0]));
+ else
+ emit_insn (gen_subsi3 (reg, stack_pointer_rtx, operands[0]));
+ cc = s390_emit_compare (GT, reg, guard);
+ s390_emit_jump (operands[1], cc);
+
+ DONE;
+})
+
+;; A basr for use in split stack prologue.
+
+(define_insn "split_stack_sibcall_basr"
+ [(set (pc) (label_ref (match_operand 1 "" "")))
+ (set (reg:SI 1) (unspec_volatile [(match_operand 0 "register_operand" "a")]
+ UNSPECV_SPLIT_STACK_SIBCALL))]
+ "!TARGET_CPU_ZARCH"
+ "basr\t%%r1, %0"
+ [(set_attr "op_type" "RR")
+ (set_attr "type" "jsr")])
+
+;; A jg with minimal fuss for use in split stack prologue.
+
+(define_insn "split_stack_sibcall_<mode>"
+ [(set (pc) (label_ref (match_operand 1 "" "")))
+ (set (reg:P 1) (unspec_volatile [(match_operand 0 "bras_sym_operand" "X")]
+ UNSPECV_SPLIT_STACK_SIBCALL))]
+ "TARGET_CPU_ZARCH"
+ "jg\t%0"
+ [(set_attr "op_type" "RIL")
+ (set_attr "type" "branch")])
+
+;; Also a conditional one.
+
+(define_insn "split_stack_cond_sibcall_<mode>"
+ [(set (pc)
+ (if_then_else
+ (match_operand 1 "" "")
+ (label_ref (match_operand 2 "" ""))
+ (pc)))
+ (set (reg:P 1) (unspec_volatile [(match_operand 0 "bras_sym_operand" "X")]
+ UNSPECV_SPLIT_STACK_SIBCALL))]
+ "TARGET_CPU_ZARCH"
+ "jg%C1\t%0"
+ [(set_attr "op_type" "RIL")
+ (set_attr "type" "branch")])
+
+;; An unusual nop instruction used to mark functions with no stack frames
+;; as split-stack aware.
+
+(define_insn "split_stack_marker"
+ [(unspec_volatile [(const_int 0)] UNSPECV_SPLIT_STACK_MARKER)]
+ ""
+ "nopr\t%%r15"
+ [(set_attr "op_type" "RR")])
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index f66646c..ff60571 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,10 @@
+2016-01-02 Marcin Kościelnicki <[email protected]>
+
+ * config.host: Use t-stack and t-stack-s390 for s390*-*-linux.
+ * config/s390/morestack.S: New file.
+ * config/s390/t-stack-s390: New file.
+ * generic-morestack.c (__splitstack_find): Add s390-specific code.
+
2015-12-18 Andris Pavenis <[email protected]>
* config.host: Add *-*-msdosdjgpp to lists of i[34567]86-*-*
diff --git a/libgcc/config.host b/libgcc/config.host
index 0a3b879..ce6d259 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -1105,11 +1105,11 @@ rx-*-elf)
tm_file="$tm_file rx/rx-abi.h rx/rx-lib.h"
;;
s390-*-linux*)
- tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux
s390/32/t-floattodi"
+ tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux
s390/32/t-floattodi t-stack s390/t-stack-s390"
md_unwind_header=s390/linux-unwind.h
;;
s390x-*-linux*)
- tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux"
+ tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux t-stack
s390/t-stack-s390"
if test "${host_address}" = 32; then
tmake_file="${tmake_file} s390/32/t-floattodi"
fi
diff --git a/libgcc/config/s390/morestack.S b/libgcc/config/s390/morestack.S
new file mode 100644
index 0000000..8e26c66
--- /dev/null
+++ b/libgcc/config/s390/morestack.S
@@ -0,0 +1,718 @@
+# s390 support for -fsplit-stack.
+# Copyright (C) 2015 Free Software Foundation, Inc.
+# Contributed by Marcin Kościelnicki <[email protected]>.
+
+# This file is part of GCC.
+
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+# <http://www.gnu.org/licenses/>.
+
+# Excess space needed to call ld.so resolver for lazy plt
+# resolution. Go uses sigaltstack so this doesn't need to
+# also cover signal frame size.
+#define BACKOFF 0x1000
+
+# The __morestack function.
+
+ .global __morestack
+ .hidden __morestack
+
+ .type __morestack,@function
+
+__morestack:
+.LFB1:
+ .cfi_startproc
+
+
+#ifndef __s390x__
+
+
+# The 31-bit __morestack function.
+
+ # We use a cleanup to restore the stack guard if an exception
+ # is thrown through this code.
+#ifndef __PIC__
+ .cfi_personality 0,__gcc_personality_v0
+ .cfi_lsda 0,.LLSDA1
+#else
+ .cfi_personality 0x9b,DW.ref.__gcc_personality_v0
+ .cfi_lsda 0x1b,.LLSDA1
+#endif
+
+ stm %r2, %r15, 0x8(%r15) # Save %r2-%r15.
+ .cfi_offset %r6, -0x48
+ .cfi_offset %r7, -0x44
+ .cfi_offset %r8, -0x40
+ .cfi_offset %r9, -0x3c
+ .cfi_offset %r10, -0x38
+ .cfi_offset %r11, -0x34
+ .cfi_offset %r12, -0x30
+ .cfi_offset %r13, -0x2c
+ .cfi_offset %r14, -0x28
+ .cfi_offset %r15, -0x24
+ lr %r11, %r15 # Make frame pointer for vararg.
+ .cfi_def_cfa_register %r11
+ ahi %r15, -0x60 # 0x60 for standard frame.
+ st %r11, 0(%r15) # Save back chain.
+ lr %r8, %r0 # Save %r0 (static chain).
+
+ basr %r13, 0 # .Lmsl to %r13
+.Lmsl:
+
+ # %r1 may point directly to the parameter area (zarch), or right after
+ # the basr instruction that called us (esa). In the first case,
+ # the pointer is already aligned. In the second case, we may need to
+ # align it up to 4 bytes to get to the parameters.
+ la %r10, 3(%r1)
+ lhi %r7, -4
+ nr %r10, %r7 # %r10 = (%r1 + 3) & ~3
+
+ l %r7, 0(%r10) # Required frame size to %r7
+ ear %r1, %a0 # Extract thread pointer.
+ l %r1, 0x20(%r1) # Get stack bounduary
+ ar %r1, %r7 # Stack bounduary + frame size
+ a %r1, 4(%r10) # + stack param size
+ clr %r1, %r15 # Compare with current stack pointer
+ jle .Lnoalloc # guard > sp - frame-size: need alloc
+
+ l %r1, .Lmslbs-.Lmsl(%r13) # __morestack_block_signals
+#ifdef __PIC__
+ bas %r14, 0(%r1, %r13)
+#else
+ basr %r14, %r1
+#endif
+
+ # We abuse one of caller's fpr save slots (which we don't use for fprs)
+ # as a local variable. Not needed here, but done to be consistent with
+ # the below use.
+ ahi %r7, BACKOFF # Bump requested size a bit.
+ st %r7, 0x40(%r11) # Stuff frame size on stack.
+ la %r2, 0x40(%r11) # Pass its address as parameter.
+ la %r3, 0x60(%r11) # Caller's stack parameters.
+ l %r4, 4(%r10) # Size of stack paremeters.
+
+ l %r1, .Lmslgms-.Lmsl(%r13) # __generic_morestack
+#ifdef __PIC__
+ bas %r14, 0(%r1, %r13)
+#else
+ basr %r14, %r1
+#endif
+
+ lr %r15, %r2 # Switch to the new stack.
+ ahi %r15, -0x60 # Make a stack frame on it.
+ st %r11, 0(%r15) # Save back chain.
+
+ s %r2, 0x40(%r11) # The end of stack space.
+ ahi %r2, BACKOFF # Back off a bit.
+ ear %r1, %a0 # Extract thread pointer.
+.LEHB0:
+ st %r2, 0x20(%r1) # Save the new stack boundary.
+
+ l %r1, .Lmslubs-.Lmsl(%r13) # __morestack_unblock_signals
+#ifdef __PIC__
+ bas %r14, 0(%r1, %r13)
+#else
+ basr %r14, %r1
+#endif
+
+ lr %r0, %r8 # Static chain.
+ lm %r2, %r6, 0x8(%r11) # Paremeter registers.
+
+ # Third parameter is address of function meat - address of parameter
+ # block.
+ a %r10, 0x8(%r10)
+
+ # Leave vararg pointer in %r1, in case function uses it
+ la %r1, 0x60(%r11)
+
+ # State of registers:
+ # %r0: Static chain from entry.
+ # %r1: Vararg pointer.
+ # %r2-%r6: Parameters from entry.
+ # %r7-%r10: Indeterminate.
+ # %r11: Frame pointer (%r15 from entry).
+ # %r12: Indeterminate.
+ # %r13: Literal pool address.
+ # %r14: Return address.
+ # %r15: Stack pointer.
+ basr %r14, %r10 # Call our caller.
+
+ stm %r2, %r3, 0x8(%r11) # Save return registers.
+
+ l %r1, .Lmslbs-.Lmsl(%r13) # __morestack_block_signals
+#ifdef __PIC__
+ bas %r14, 0(%r1, %r13)
+#else
+ basr %r14, %r1
+#endif
+
+ # We need a stack slot now, but have no good way to get it - the frame
+ # on new stack had to be exactly 0x60 bytes, or stack parameters would
+ # be passed wrong. Abuse fpr save area in caller's frame (we don't
+ # save actual fprs).
+ la %r2, 0x40(%r11)
+ l %r1, .Lmslgrs-.Lmsl(%r13) # __generic_releasestack
+#ifdef __PIC__
+ bas %r14, 0(%r1, %r13)
+#else
+ basr %r14, %r1
+#endif
+
+ s %r2, 0x40(%r11) # Subtract available space.
+ ahi %r2, BACKOFF # Back off a bit.
+ ear %r1, %a0 # Extract thread pointer.
+.LEHE0:
+ st %r2, 0x20(%r1) # Save the new stack boundary.
+
+ # We need to restore the old stack pointer before unblocking signals.
+ # We also need 0x60 bytes for a stack frame. Since we had a stack
+ # frame at this place before the stack switch, there's no need to
+ # write the back chain again.
+ lr %r15, %r11
+ ahi %r15, -0x60
+
+ l %r1, .Lmslubs-.Lmsl(%r13) # __morestack_unblock_signals
+#ifdef __PIC__
+ bas %r14, 0(%r1, %r13)
+#else
+ basr %r14, %r1
+#endif
+
+ lm %r2, %r15, 0x8(%r11) # Restore all registers.
+ .cfi_remember_state
+ .cfi_restore %r15
+ .cfi_restore %r14
+ .cfi_restore %r13
+ .cfi_restore %r12
+ .cfi_restore %r11
+ .cfi_restore %r10
+ .cfi_restore %r9
+ .cfi_restore %r8
+ .cfi_restore %r7
+ .cfi_restore %r6
+ .cfi_def_cfa_register %r15
+ br %r14 # Return to caller's caller.
+
+# Executed if no new stack allocation is needed.
+
+.Lnoalloc:
+ .cfi_restore_state
+ # We may need to copy stack parameters.
+ l %r9, 0x4(%r10) # Load stack parameter size.
+ ltr %r9, %r9 # And check if it's 0.
+ je .Lnostackparm # Skip the copy if not needed.
+ sr %r15, %r9 # Make space on the stack.
+ la %r8, 0x60(%r15) # Destination.
+ la %r12, 0x60(%r11) # Source.
+ lr %r13, %r9 # Source size.
+.Lcopy:
+ mvcle %r8, %r12, 0 # Copy.
+ jo .Lcopy
+
+.Lnostackparm:
+ # Third parameter is address of function meat - address of parameter
+ # block.
+ a %r10, 0x8(%r10)
+
+ # Leave vararg pointer in %r1, in case function uses it
+ la %r1, 0x60(%r11)
+
+ # OK, no stack allocation needed. We still follow the protocol and
+ # call our caller - it doesn't cost much and makes sure vararg works.
+ # No need to set any registers here - %r0 and %r2-%r6 weren't modified.
+ basr %r14, %r10 # Call our caller.
+
+ lm %r6, %r15, 0x18(%r11) # Restore all callee-saved registers.
+ .cfi_remember_state
+ .cfi_restore %r15
+ .cfi_restore %r14
+ .cfi_restore %r13
+ .cfi_restore %r12
+ .cfi_restore %r11
+ .cfi_restore %r10
+ .cfi_restore %r9
+ .cfi_restore %r8
+ .cfi_restore %r7
+ .cfi_restore %r6
+ .cfi_def_cfa_register %r15
+ br %r14 # Return to caller's caller.
+
+# This is the cleanup code called by the stack unwinder when unwinding
+# through the code between .LEHB0 and .LEHE0 above.
+
+.L1:
+ .cfi_restore_state
+ lr %r2, %r11 # Stack pointer after resume.
+ l %r1, .Lmslgfs-.Lmsl(%r13) # __generic_findstack
+#ifdef __PIC__
+ bas %r14, 0(%r1, %r13)
+#else
+ basr %r14, %r1
+#endif
+ lr %r3, %r11 # Get the stack pointer.
+ sr %r3, %r2 # Subtract available space.
+ ahi %r3, BACKOFF # Back off a bit.
+ ear %r1, %a0 # Extract thread pointer.
+ st %r3, 0x20(%r1) # Save the new stack boundary.
+
+ lr %r2, %r6 # Exception header.
+#ifdef __PIC__
+ l %r12, .Lmslgot-.Lmsl(%r13)
+ ar %r12, %r13
+ l %r1, .Lmslunw-.Lmsl(%r13)
+ bas %r14, 0(%r1, %r12)
+#else
+ l %r1, .Lmslunw-.Lmsl(%r13)
+ basr %r14, %r1
+#endif
+
+# Literal pool.
+
+.align 4
+#ifdef __PIC__
+.Lmslbs:
+ .long __morestack_block_signals-.Lmsl
+.Lmslubs:
+ .long __morestack_unblock_signals-.Lmsl
+.Lmslgms:
+ .long __generic_morestack-.Lmsl
+.Lmslgrs:
+ .long __generic_releasestack-.Lmsl
+.Lmslgfs:
+ .long __generic_findstack-.Lmsl
+.Lmslunw:
+ .long _Unwind_Resume@PLTOFF
+.Lmslgot:
+ .long _GLOBAL_OFFSET_TABLE_-.Lmsl
+#else
+.Lmslbs:
+ .long __morestack_block_signals
+.Lmslubs:
+ .long __morestack_unblock_signals
+.Lmslgms:
+ .long __generic_morestack
+.Lmslgrs:
+ .long __generic_releasestack
+.Lmslgfs:
+ .long __generic_findstack
+.Lmslunw:
+ .long _Unwind_Resume
+#endif
+
+#else /* defined(__s390x__) */
+
+
+# The 64-bit __morestack function.
+
+ # We use a cleanup to restore the stack guard if an exception
+ # is thrown through this code.
+#ifndef __PIC__
+ .cfi_personality 0x3,__gcc_personality_v0
+ .cfi_lsda 0x3,.LLSDA1
+#else
+ .cfi_personality 0x9b,DW.ref.__gcc_personality_v0
+ .cfi_lsda 0x1b,.LLSDA1
+#endif
+
+ stmg %r2, %r15, 0x10(%r15) # Save %r2-%r15.
+ .cfi_offset %r6, -0x70
+ .cfi_offset %r7, -0x68
+ .cfi_offset %r8, -0x60
+ .cfi_offset %r9, -0x58
+ .cfi_offset %r10, -0x50
+ .cfi_offset %r11, -0x48
+ .cfi_offset %r12, -0x40
+ .cfi_offset %r13, -0x38
+ .cfi_offset %r14, -0x30
+ .cfi_offset %r15, -0x28
+ lgr %r11, %r15 # Make frame pointer for vararg.
+ .cfi_def_cfa_register %r11
+ aghi %r15, -0xa0 # 0xa0 for standard frame.
+ stg %r11, 0(%r15) # Save back chain.
+ lgr %r8, %r0 # Save %r0 (static chain).
+ lgr %r10, %r1 # Save %r1 (address of parameter block).
+
+ lg %r7, 0(%r10) # Required frame size to %r7
+ ear %r1, %a0
+ sllg %r1, %r1, 32
+ ear %r1, %a1 # Extract thread pointer.
+ lg %r1, 0x38(%r1) # Get stack bounduary
+ agr %r1, %r7 # Stack bounduary + frame size
+ ag %r1, 8(%r10) # + stack param size
+ clgr %r1, %r15 # Compare with current stack pointer
+ jle .Lnoalloc # guard > sp - frame-size: need alloc
+
+ brasl %r14, __morestack_block_signals
+
+ # We abuse one of caller's fpr save slots (which we don't use for fprs)
+ # as a local variable. Not needed here, but done to be consistent with
+ # the below use.
+ aghi %r7, BACKOFF # Bump requested size a bit.
+ stg %r7, 0x80(%r11) # Stuff frame size on stack.
+ la %r2, 0x80(%r11) # Pass its address as parameter.
+ la %r3, 0xa0(%r11) # Caller's stack parameters.
+ lg %r4, 8(%r10) # Size of stack paremeters.
+ brasl %r14, __generic_morestack
+
+ lgr %r15, %r2 # Switch to the new stack.
+ aghi %r15, -0xa0 # Make a stack frame on it.
+ stg %r11, 0(%r15) # Save back chain.
+
+ sg %r2, 0x80(%r11) # The end of stack space.
+ aghi %r2, BACKOFF # Back off a bit.
+ ear %r1, %a0
+ sllg %r1, %r1, 32
+ ear %r1, %a1 # Extract thread pointer.
+.LEHB0:
+ stg %r2, 0x38(%r1) # Save the new stack boundary.
+
+ brasl %r14, __morestack_unblock_signals
+
+ lgr %r0, %r8 # Static chain.
+ lmg %r2, %r6, 0x10(%r11) # Paremeter registers.
+
+ # Third parameter is address of function meat - address of parameter
+ # block.
+ ag %r10, 0x10(%r10)
+
+ # Leave vararg pointer in %r1, in case function uses it
+ la %r1, 0xa0(%r11)
+
+ # State of registers:
+ # %r0: Static chain from entry.
+ # %r1: Vararg pointer.
+ # %r2-%r6: Parameters from entry.
+ # %r7-%r10: Indeterminate.
+ # %r11: Frame pointer (%r15 from entry).
+ # %r12-%r13: Indeterminate.
+ # %r14: Return address.
+ # %r15: Stack pointer.
+ basr %r14, %r10 # Call our caller.
+
+ stg %r2, 0x10(%r11) # Save return register.
+
+ brasl %r14, __morestack_block_signals
+
+ # We need a stack slot now, but have no good way to get it - the frame
+ # on new stack had to be exactly 0xa0 bytes, or stack parameters would
+ # be passed wrong. Abuse fpr save area in caller's frame (we don't
+ # save actual fprs).
+ la %r2, 0x80(%r11)
+ brasl %r14, __generic_releasestack
+
+ sg %r2, 0x80(%r11) # Subtract available space.
+ aghi %r2, BACKOFF # Back off a bit.
+ ear %r1, %a0
+ sllg %r1, %r1, 32
+ ear %r1, %a1 # Extract thread pointer.
+.LEHE0:
+ stg %r2, 0x38(%r1) # Save the new stack boundary.
+
+ # We need to restore the old stack pointer before unblocking signals.
+ # We also need 0xa0 bytes for a stack frame. Since we had a stack
+ # frame at this place before the stack switch, there's no need to
+ # write the back chain again.
+ lgr %r15, %r11
+ aghi %r15, -0xa0
+
+ brasl %r14, __morestack_unblock_signals
+
+ lmg %r2, %r15, 0x10(%r11) # Restore all registers.
+ .cfi_remember_state
+ .cfi_restore %r15
+ .cfi_restore %r14
+ .cfi_restore %r13
+ .cfi_restore %r12
+ .cfi_restore %r11
+ .cfi_restore %r10
+ .cfi_restore %r9
+ .cfi_restore %r8
+ .cfi_restore %r7
+ .cfi_restore %r6
+ .cfi_def_cfa_register %r15
+ br %r14 # Return to caller's caller.
+
+# Executed if no new stack allocation is needed.
+
+.Lnoalloc:
+ .cfi_restore_state
+ # We may need to copy stack parameters.
+ lg %r9, 0x8(%r10) # Load stack parameter size.
+ ltgr %r9, %r9 # Check if it's 0.
+ je .Lnostackparm # Skip the copy if not needed.
+ sgr %r15, %r9 # Make space on the stack.
+ la %r8, 0xa0(%r15) # Destination.
+ la %r12, 0xa0(%r11) # Source.
+ lgr %r13, %r9 # Source size.
+.Lcopy:
+ mvcle %r8, %r12, 0 # Copy.
+ jo .Lcopy
+
+.Lnostackparm:
+ # Third parameter is address of function meat - address of parameter
+ # block.
+ ag %r10, 0x10(%r10)
+
+ # Leave vararg pointer in %r1, in case function uses it
+ la %r1, 0xa0(%r11)
+
+ # OK, no stack allocation needed. We still follow the protocol and
+ # call our caller - it doesn't cost much and makes sure vararg works.
+ # No need to set any registers here - %r0 and %r2-%r6 weren't modified.
+ basr %r14, %r10 # Call our caller.
+
+ lmg %r6, %r15, 0x30(%r11) # Restore all callee-saved registers.
+ .cfi_remember_state
+ .cfi_restore %r15
+ .cfi_restore %r14
+ .cfi_restore %r13
+ .cfi_restore %r12
+ .cfi_restore %r11
+ .cfi_restore %r10
+ .cfi_restore %r9
+ .cfi_restore %r8
+ .cfi_restore %r7
+ .cfi_restore %r6
+ .cfi_def_cfa_register %r15
+ br %r14 # Return to caller's caller.
+
+# This is the cleanup code called by the stack unwinder when unwinding
+# through the code between .LEHB0 and .LEHE0 above.
+
+.L1:
+ .cfi_restore_state
+ lgr %r2, %r11 # Stack pointer after resume.
+ brasl %r14, __generic_findstack
+ lgr %r3, %r11 # Get the stack pointer.
+ sgr %r3, %r2 # Subtract available space.
+ aghi %r3, BACKOFF # Back off a bit.
+ ear %r1, %a0
+ sllg %r1, %r1, 32
+ ear %r1, %a1 # Extract thread pointer.
+ stg %r3, 0x38(%r1) # Save the new stack boundary.
+
+ lgr %r2, %r6 # Exception header.
+#ifdef __PIC__
+ brasl %r14, _Unwind_Resume@PLT
+#else
+ brasl %r14, _Unwind_Resume
+#endif
+
+#endif /* defined(__s390x__) */
+
+ .cfi_endproc
+ .size __morestack, . - __morestack
+
+
+# The exception table. This tells the personality routine to execute
+# the exception handler.
+
+ .section .gcc_except_table,"a",@progbits
+ .align 4
+.LLSDA1:
+ .byte 0xff # @LPStart format (omit)
+ .byte 0xff # @TType format (omit)
+ .byte 0x1 # call-site format (uleb128)
+ .uleb128 .LLSDACSE1-.LLSDACSB1 # Call-site table length
+.LLSDACSB1:
+ .uleb128 .LEHB0-.LFB1 # region 0 start
+ .uleb128 .LEHE0-.LEHB0 # length
+ .uleb128 .L1-.LFB1 # landing pad
+ .uleb128 0 # action
+.LLSDACSE1:
+
+
+ .global __gcc_personality_v0
+#ifdef __PIC__
+ # Build a position independent reference to the basic
+ # personality function.
+ .hidden DW.ref.__gcc_personality_v0
+ .weak DW.ref.__gcc_personality_v0
+ .section
.data.DW.ref.__gcc_personality_v0,"awG",@progbits,DW.ref.__gcc_personality_v0,comdat
+ .type DW.ref.__gcc_personality_v0, @object
+DW.ref.__gcc_personality_v0:
+#ifndef __LP64__
+ .align 4
+ .size DW.ref.__gcc_personality_v0, 4
+ .long __gcc_personality_v0
+#else
+ .align 8
+ .size DW.ref.__gcc_personality_v0, 8
+ .quad __gcc_personality_v0
+#endif
+#endif
+
+
+
+# Initialize the stack test value when the program starts or when a
+# new thread starts. We don't know how large the main stack is, so we
+# guess conservatively. We might be able to use getrlimit here.
+
+ .text
+ .global __stack_split_initialize
+ .hidden __stack_split_initialize
+
+ .type __stack_split_initialize, @function
+
+__stack_split_initialize:
+
+#ifndef __s390x__
+
+ ear %r1, %a0
+ lr %r0, %r15
+ ahi %r0, -0x4000 # We should have at least 16K.
+ st %r0, 0x20(%r1)
+
+ lr %r2, %r15
+ lhi %r3, 0x4000
+#ifdef __PIC__
+ # Cannot do a tail call - we'll go through PLT, so we need GOT address
+ # in %r12, which is callee-saved.
+ stm %r12, %r15, 0x30(%r15)
+ basr %r13, 0
+.Lssi0:
+ ahi %r15, -0x60
+ l %r12, .Lssi2-.Lssi0(%r13)
+ ar %r12, %r13
+ l %r1, .Lssi1-.Lssi0(%r13)
+ bas %r14, 0(%r1, %r12)
+ lm %r12, %r15, 0x90(%r15)
+ br %r14
+
+.align 4
+.Lssi1:
+ .long __generic_morestack_set_initial_sp@PLTOFF
+.Lssi2:
+ .long _GLOBAL_OFFSET_TABLE_-.Lssi0
+
+#else
+ basr %r1, 0
+.Lssi0:
+ l %r1, .Lssi1-.Lssi0(%r1)
+ br %r1 # Tail call
+
+.align 4
+.Lssi1:
+ .long __generic_morestack_set_initial_sp
+#endif
+
+#else /* defined(__s390x__) */
+
+ ear %r1, %a0
+ sllg %r1, %r1, 32
+ ear %r1, %a1
+ lgr %r0, %r15
+ aghi %r0, -0x4000 # We should have at least 16K.
+ stg %r0, 0x38(%r1)
+
+ lgr %r2, %r15
+ lghi %r3, 0x4000
+#ifdef __PIC__
+ jg __generic_morestack_set_initial_sp@PLT # Tail call
+#else
+ jg __generic_morestack_set_initial_sp # Tail call
+#endif
+
+#endif /* defined(__s390x__) */
+
+ .size __stack_split_initialize, . - __stack_split_initialize
+
+# Routines to get and set the guard, for __splitstack_getcontext,
+# __splitstack_setcontext, and __splitstack_makecontext.
+
+# void *__morestack_get_guard (void) returns the current stack guard.
+ .text
+ .global __morestack_get_guard
+ .hidden __morestack_get_guard
+
+ .type __morestack_get_guard,@function
+
+__morestack_get_guard:
+
+#ifndef __s390x__
+ ear %r1, %a0
+ l %r2, 0x20(%r1)
+#else
+ ear %r1, %a0
+ sllg %r1, %r1, 32
+ ear %r1, %a1
+ lg %r2, 0x38(%r1)
+#endif
+ br %r14
+
+ .size __morestack_get_guard, . - __morestack_get_guard
+
+# void __morestack_set_guard (void *) sets the stack guard.
+ .global __morestack_set_guard
+ .hidden __morestack_set_guard
+
+ .type __morestack_set_guard,@function
+
+__morestack_set_guard:
+
+#ifndef __s390x__
+ ear %r1, %a0
+ st %r2, 0x20(%r1)
+#else
+ ear %r1, %a0
+ sllg %r1, %r1, 32
+ ear %r1, %a1
+ stg %r2, 0x38(%r1)
+#endif
+ br %r14
+
+ .size __morestack_set_guard, . - __morestack_set_guard
+
+# void *__morestack_make_guard (void *, size_t) returns the stack
+# guard value for a stack.
+ .global __morestack_make_guard
+ .hidden __morestack_make_guard
+
+ .type __morestack_make_guard,@function
+
+__morestack_make_guard:
+
+#ifndef __s390x__
+ sr %r2, %r3
+ ahi %r2, BACKOFF
+#else
+ sgr %r2, %r3
+ aghi %r2, BACKOFF
+#endif
+ br %r14
+
+ .size __morestack_make_guard, . - __morestack_make_guard
+
+# Make __stack_split_initialize a high priority constructor.
+
+ .section .ctors.65535,"aw",@progbits
+
+#ifndef __LP64__
+ .align 4
+ .long __stack_split_initialize
+ .long __morestack_load_mmap
+#else
+ .align 8
+ .quad __stack_split_initialize
+ .quad __morestack_load_mmap
+#endif
+
+ .section .note.GNU-stack,"",@progbits
+ .section .note.GNU-split-stack,"",@progbits
+ .section .note.GNU-no-split-stack,"",@progbits
diff --git a/libgcc/config/s390/t-stack-s390 b/libgcc/config/s390/t-stack-s390
new file mode 100644
index 0000000..4c959b0
--- /dev/null
+++ b/libgcc/config/s390/t-stack-s390
@@ -0,0 +1,2 @@
+# Makefile fragment to support -fsplit-stack for s390.
+LIB2ADD_ST += $(srcdir)/config/s390/morestack.S
diff --git a/libgcc/generic-morestack.c b/libgcc/generic-morestack.c
index a10559b..8109c1a 100644
--- a/libgcc/generic-morestack.c
+++ b/libgcc/generic-morestack.c
@@ -939,6 +939,10 @@ __splitstack_find (void *segment_arg, void *sp, size_t
*len,
#elif defined (__i386__)
nsp -= 6 * sizeof (void *);
#elif defined __powerpc64__
+#elif defined __s390x__
+ nsp -= 2 * 160;
+#elif defined __s390__
+ nsp -= 2 * 96;
#else
#error "unrecognized target"
#endif
--
2.6.4