Yury Khrustalev <yury.khrusta...@arm.com> writes: > From: Szabolcs Nagy <szabolcs.n...@arm.com> > > Nonlocal stack save and restore has to also save and restore the GCS > pointer. This is used in __builtin_setjmp/longjmp and nonlocal goto. > > The GCS specific code is only emitted if GCS branch-protection is > enabled and the code always checks at runtime if GCS is enabled. > > The new -mbranch-protection=gcs and old -mbranch-protection=none code > are ABI compatible: jmpbuf for __builtin_setjmp has space for 5 > pointers, the layout is > > old layout: fp, pc, sp, unused, unused > new layout: fp, pc, sp, gcsp, unused > > Note: the ILP32 code generation is wrong as it saves the pointers with > Pmode (i.e. 8 bytes per pointer), but the user supplied buffer size is > for 5 pointers (4 bytes per pointer), this is not fixed. > > The nonlocal goto has no ABI compatibility issues as the goto and its > destination are in the same translation unit. > > We use CDImode to allow extra space for GCS without the effect of 16-byte > alignment. > > gcc/ChangeLog: > > * config/aarch64/aarch64.h (STACK_SAVEAREA_MODE): Make space for gcs. > * config/aarch64/aarch64.md (save_stack_nonlocal): New. > (restore_stack_nonlocal): New. > * tree-nested.cc (get_nl_goto_field): Updated. > --- > gcc/config/aarch64/aarch64.h | 7 +++ > gcc/config/aarch64/aarch64.md | 82 +++++++++++++++++++++++++++++++++++ > gcc/tree-nested.cc | 4 +- > 3 files changed, 91 insertions(+), 2 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > index 593319fd472..c3fbe9b464c 100644 > --- a/gcc/config/aarch64/aarch64.h > +++ b/gcc/config/aarch64/aarch64.h > @@ -1297,6 +1297,13 @@ typedef struct > #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ > ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) > > +/* Have space for both SP and GCSPR in the NONLOCAL case in > + emit_stack_save as well as in __builtin_setjmp, __builtin_longjmp > + and __builtin_nonlocal_goto. > + Note: On ILP32 the documented buf size is not enough PR84150. */ > +#define STACK_SAVEAREA_MODE(LEVEL) \ > + ((LEVEL) == SAVE_NONLOCAL ? E_CDImode : Pmode) > + > #define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNUM) > > #define RETURN_ADDR_RTX aarch64_return_addr > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index e16e841fae0..861527fce36 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -1199,6 +1199,88 @@ (define_insn "*cb<optab><mode>1" > (const_int 1)))] > ) > > +(define_expand "save_stack_nonlocal" > + [(set (match_operand 0 "memory_operand") > + (match_operand 1 "register_operand"))] > + "" > +{ > + rtx stack_slot = adjust_address (operands[0], Pmode, 0); > + emit_move_insn (stack_slot, operands[1]); > + > + if (aarch64_gcs_enabled ()) > + { > + /* Save GCS with code like > + mov x16, 1 > + chkfeat x16 > + tbnz x16, 0, .L_done > + mrs tmp, gcspr_el0 > + str tmp, [%0, 8] > + .L_done: */ > + > + rtx done_label = gen_label_rtx (); > + rtx r16 = gen_rtx_REG (DImode, R16_REGNUM); > + emit_move_insn (r16, const1_rtx); > + emit_insn (gen_aarch64_chkfeat ()); > + emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label)); > + rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE > (Pmode)); > + rtx gcs = gen_reg_rtx (Pmode); > + emit_insn (gen_aarch64_load_gcspr (gcs)); > + emit_move_insn (gcs_slot, gcs); > + emit_label (done_label); > + } > + DONE; > +}) > + > +(define_expand "restore_stack_nonlocal" > + [(set (match_operand 0 "register_operand" "") > + (match_operand 1 "memory_operand" ""))] > + "" > +{ > + rtx stack_slot = adjust_address (operands[1], Pmode, 0); > + emit_move_insn (operands[0], stack_slot); > + > + if (aarch64_gcs_enabled ()) > + { > + /* Restore GCS with code like > + mov x16, 1 > + chkfeat x16 > + tbnz x16, 0, .L_done > + ldr tmp1, [%1, 8] > + mrs tmp2, gcspr_el0 > + subs tmp2, tmp1, tmp2 > + b.eq .L_done > + .L_loop: > + gcspopm > + subs tmp2, tmp2, 8 > + b.ne .L_loop > + .L_done: */ > + > + rtx loop_label = gen_label_rtx (); > + rtx done_label = gen_label_rtx (); > + rtx r16 = gen_rtx_REG (DImode, R16_REGNUM); > + emit_move_insn (r16, const1_rtx); > + emit_insn (gen_aarch64_chkfeat ()); > + emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label)); > + rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE > (Pmode)); > + rtx gcs_old = force_reg (Pmode, const0_rtx);
This should be: rtx gcs_old = gen_reg_rtx (Pmode); > + emit_move_insn (gcs_old, gcs_slot); > + rtx gcs_now = force_reg (Pmode, const0_rtx); Similarly: rtx gcs_new = gen_reg_rtx (Pmode); Looks good otherwise. Thanks, Richard > + emit_insn (gen_aarch64_load_gcspr (gcs_now)); > + emit_insn (gen_subdi3_compare1 (gcs_now, gcs_old, gcs_now)); > + rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); > + rtx cmp_rtx = gen_rtx_fmt_ee (EQ, DImode, cc_reg, const0_rtx); > + emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, done_label)); > + emit_label (loop_label); > + emit_insn (gen_aarch64_gcspopm_xzr ()); > + emit_insn (gen_adddi3_compare0 (gcs_now, gcs_now, GEN_INT (-8))); > + cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); > + cmp_rtx = gen_rtx_fmt_ee (NE, DImode, cc_reg, const0_rtx); > + emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, loop_label)); > + emit_label (done_label); > + } > + DONE; > +}) > + > ;; ------------------------------------------------------------------- > ;; Subroutine calls and sibcalls > ;; ------------------------------------------------------------------- > diff --git a/gcc/tree-nested.cc b/gcc/tree-nested.cc > index a54e72c3237..dfbd3432ce1 100644 > --- a/gcc/tree-nested.cc > +++ b/gcc/tree-nested.cc > @@ -783,8 +783,8 @@ get_nl_goto_field (struct nesting_info *info) > else > type = lang_hooks.types.type_for_mode (Pmode, 1); > > - scalar_int_mode mode > - = as_a <scalar_int_mode> (STACK_SAVEAREA_MODE (SAVE_NONLOCAL)); > + fixed_size_mode mode > + = as_a <fixed_size_mode> (STACK_SAVEAREA_MODE (SAVE_NONLOCAL)); > size = GET_MODE_SIZE (mode); > size = size / GET_MODE_SIZE (Pmode); > size = size + 1;