This patch adds two builtins (getting end-of-stack pointer and a Boolean answer whether it was the first call to the builtin on this thread).
The idea is to replace some hard-coded values in newlib, permitting to move later to a manually allocated stack on the compiler side without the need to modify newlib again. The GCC patch matches what newlib did in reent; I could imagine that we change this later on. Lightly tested (especially by visual inspection). Currently doing a final regtest, OK when it passes? Any comments to this patch - or the attached newlib patch?* Tobias (*) I also included a patch to newlib to see where were are heading + to actually use them for regtesting ... ----------------- Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
gcn: Add __builtin_gcn_{get_stack_limit,first_call_this_thread_p} The new builtins have been added for newlib to reduce dependency on compiler-internal implementation choices of GCC in newlibs' getreent.c. gcc/ChangeLog: * config/gcn/gcn-builtins.def (FIRST_CALL_THIS_THREAD_P, GET_STACK_LIMIT): Add new builtins. * config/gcn/gcn.cc (gcn_expand_builtin_1): Expand them. * config/gcn/gcn.md (prologue_use): Add "register_operand" as arg to match_operand. (prologue_use_di): New; DI insn_and_split variant of the former. Co-Authored-By: Andrew Stubbs <a...@codesourcery.com> gcc/config/gcn/gcn-builtins.def | 4 +++ gcc/config/gcn/gcn.cc | 70 ++++++++++++++++++++++++++++++++++++++++- gcc/config/gcn/gcn.md | 15 ++++++++- 3 files changed, 87 insertions(+), 2 deletions(-) diff --git a/gcc/config/gcn/gcn-builtins.def b/gcc/config/gcn/gcn-builtins.def index eeeaebf9013..f1cf30bbc94 100644 --- a/gcc/config/gcn/gcn-builtins.def +++ b/gcc/config/gcn/gcn-builtins.def @@ -160,8 +160,12 @@ DEF_BUILTIN (ACC_BARRIER, -1, "acc_barrier", B_INSN, _A1 (GCN_BTI_VOID), /* Kernel inputs. */ +DEF_BUILTIN (FIRST_CALL_THIS_THREAD_P, -1, "first_call_this_thread_p", B_INSN, + _A1 (GCN_BTI_BOOL), gcn_expand_builtin_1) DEF_BUILTIN (KERNARG_PTR, -1, "kernarg_ptr", B_INSN, _A1 (GCN_BTI_VOIDPTR), gcn_expand_builtin_1) +DEF_BUILTIN (GET_STACK_LIMIT, -1, "get_stack_limit", B_INSN, + _A1 (GCN_BTI_VOIDPTR), gcn_expand_builtin_1) #undef _A1 #undef _A2 diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index b3814c2e7c6..051eadee783 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -4493,6 +4493,44 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , emit_insn (gen_gcn_wavefront_barrier ()); return target; + case GCN_BUILTIN_GET_STACK_LIMIT: + { + /* stackbase = (stack_segment_decr & 0x0000ffffffffffff) + + stack_wave_offset); + seg_size = dispatch_ptr->private_segment_size; + stacklimit = stackbase + seg_size*64; + with segsize = dispatch_ptr + 6*sizeof(int16_t) + 3*sizeof(int32_t); + cf. struct hsa_kernel_dispatch_packet_s in the HSA doc. */ + rtx ptr; + if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0 + && cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0) + { + rtx size_rtx = gen_rtx_REG (DImode, + cfun->machine->args.reg[DISPATCH_PTR_ARG]); + size_rtx = gen_rtx_MEM (DImode, + gen_rtx_PLUS (DImode, size_rtx, + GEN_INT (6*16 + 3*32))); + size_rtx = gen_rtx_MULT (DImode, size_rtx, GEN_INT (64)); + + ptr = gen_rtx_REG (DImode, + cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]); + ptr = gen_rtx_AND (DImode, ptr, GEN_INT (0x0000ffffffffffff)); + ptr = gen_rtx_PLUS (DImode, ptr, size_rtx); + if (cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG] >= 0) + { + rtx off; + off = gen_rtx_REG (SImode, + cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]); + ptr = gen_rtx_PLUS (DImode, ptr, off); + } + } + else + { + ptr = gen_reg_rtx (DImode); + emit_move_insn (ptr, const0_rtx); + } + return ptr; + } case GCN_BUILTIN_KERNARG_PTR: { rtx ptr; @@ -4506,7 +4544,37 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , } return ptr; } - + case GCN_BUILTIN_FIRST_CALL_THIS_THREAD_P: + { + /* Stash a marker in the unused upper 16 bits of s[0:1] to indicate + whether it was the first call. */ + rtx result = gen_reg_rtx (BImode); + emit_move_insn (result, const0_rtx); + if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0) + { + rtx not_first = gen_label_rtx (); + rtx reg = gen_rtx_REG (DImode, + cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]); + rtx cmp = force_reg (DImode, + gen_rtx_AND (DImode, reg, + GEN_INT (0xffff000000000000L))); + emit_insn (gen_cstoresi4 (result, gen_rtx_EQ (BImode, cmp, + GEN_INT(12345L << 48)), + cmp, GEN_INT(12345L << 48))); + emit_jump_insn (gen_cjump (not_first, gen_rtx_EQ (BImode, result, + const0_rtx), + result)); + emit_move_insn (reg, + force_reg (DImode, + gen_rtx_IOR (DImode, + gen_rtx_AND (DImode, reg, + GEN_INT (0x0000ffffffffffffL)), + GEN_INT (12345L << 48)))); + emit_insn (gen_prologue_use (reg)); + emit_label (not_first); + } + return result; + } default: gcc_unreachable (); } diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 987b76396cc..a8b9c28d115 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -692,11 +692,24 @@ ;; {{{ Prologue/Epilogue (define_insn "prologue_use" - [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)] + [(unspec_volatile [(match_operand 0 "register_operand")] UNSPECV_PROLOGUE_USE)] "" "" [(set_attr "length" "0")]) +(define_insn_and_split "prologue_use_di" + [(unspec_volatile [(match_operand:DI 0 "register_operand")] UNSPECV_PROLOGUE_USE)] + "" + "#" + "reload_completed" + [(unspec_volatile [(match_dup 0)] UNSPECV_PROLOGUE_USE) + (unspec_volatile [(match_dup 1)] UNSPECV_PROLOGUE_USE)] + { + operands[1] = gcn_operand_part (DImode, operands[0], 1); + operands[0] = gcn_operand_part (DImode, operands[0], 0); + } + [(set_attr "length" "0")]) + (define_expand "prologue" [(const_int 0)] ""
amdgcn: Use __builtin_gcn_ in libc/machine/amdgcn/getreent.c Call __builtin_gcn_get_stack_limit and __builtin_gcn_first_call_this_thread_p to reduce dependency on some register/layout assumptions by using the new GCC mainline (GCC 13) builtins, if they are available. If not, the existing code is used. newlib/libc/machine/amdgcn/getreent.c | 38 ++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/newlib/libc/machine/amdgcn/getreent.c b/newlib/libc/machine/amdgcn/getreent.c index be7d2edc9..ef731f649 100644 --- a/newlib/libc/machine/amdgcn/getreent.c +++ b/newlib/libc/machine/amdgcn/getreent.c @@ -29,22 +29,42 @@ typedef struct hsa_kernel_dispatch_packet_s { struct _reent * __getreent (void) { - /* Place the reent data at the top of the stack allocation. - s[0:1] contains a 48-bit private segment base address. + /* Place the reent data at the top of the stack allocation. */ + struct data { + int marker; + struct _reent reent; + } *data; + +#if defined(__has_builtin) \ + && __has_builtin(__builtin_gcn_get_stack_limit) \ + && __has_builtin(__builtin_gcn_first_call_this_thread_p) + unsigned long addr = (((unsigned long) __builtin_gcn_get_stack_limit() + - sizeof(struct data)) & ~7); + data = (struct data *)addr; + + register long sp asm("s16"); + + if (sp >= addr) + goto stackoverflow; + if (__builtin_gcn_first_call_this_thread_p()) + { + data->marker = 12345; + __builtin_memset (&data->reent, 0, sizeof(struct _reent)); + _REENT_INIT_PTR_ZEROED (&data->reent); + } + else if (data->marker != 12345) + goto stackoverflow; +#else + /* s[0:1] contains a 48-bit private segment base address. s11 contains the offset to the base of the stack. s[4:5] contains the dispatch pointer. - + WARNING: this code will break if s[0:1] is ever used for anything! */ const register unsigned long buffer_descriptor asm("s0"); unsigned long private_segment = buffer_descriptor & 0x0000ffffffffffff; const register unsigned int stack_offset asm("s11"); const register hsa_kernel_dispatch_packet_t *dispatch_ptr asm("s4"); - struct data { - int marker; - struct _reent reent; - } *data; - unsigned long stack_base = private_segment + stack_offset; unsigned long stack_end = stack_base + dispatch_ptr->private_segment_size * 64; unsigned long addr = (stack_end - sizeof(struct data)) & ~7; @@ -69,7 +89,7 @@ __getreent (void) } else if (data->marker != 12345) goto stackoverflow; - +#endif return &data->reent;