https://gcc.gnu.org/g:41479351d638ed469bd6d8975dcacc4e72e503b3

commit r15-5267-g41479351d638ed469bd6d8975dcacc4e72e503b3
Author: Szabolcs Nagy <szabolcs.n...@arm.com>
Date:   Thu Nov 14 16:15:08 2024 +0000

    aarch64: Add GCS support for nonlocal stack save
    
    Nonlocal stack save and restore has to also save and restore the GCS
    pointer. This is used in __builtin_setjmp/longjmp and nonlocal goto.
    
    The GCS specific code is only emitted if GCS branch-protection is
    enabled and the code always checks at runtime if GCS is enabled.
    
    The new -mbranch-protection=gcs and old -mbranch-protection=none code
    are ABI compatible: jmpbuf for __builtin_setjmp has space for 5
    pointers, the layout is
    
      old layout: fp, pc, sp, unused, unused
      new layout: fp, pc, sp, gcsp, unused
    
    Note: the ILP32 code generation is wrong as it saves the pointers with
    Pmode (i.e. 8 bytes per pointer), but the user supplied buffer size is
    for 5 pointers (4 bytes per pointer), this is not fixed.
    
    The nonlocal goto has no ABI compatibility issues as the goto and its
    destination are in the same translation unit.
    
    We use CDImode to allow extra space for GCS without the effect of 16-byte
    alignment.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64.h (STACK_SAVEAREA_MODE): Make space for 
gcs.
            * config/aarch64/aarch64.md (save_stack_nonlocal): New.
            (restore_stack_nonlocal): New.
            * tree-nested.cc (get_nl_goto_field): Updated.

Diff:
---
 gcc/config/aarch64/aarch64.h  |  7 ++++
 gcc/config/aarch64/aarch64.md | 82 +++++++++++++++++++++++++++++++++++++++++++
 gcc/tree-nested.cc            |  4 +--
 3 files changed, 91 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index f07b2c49f0d9..58d82c7d0e85 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -1308,6 +1308,13 @@ typedef struct
 #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
   ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
 
+/* Have space for both SP and GCSPR in the NONLOCAL case in
+   emit_stack_save as well as in __builtin_setjmp, __builtin_longjmp
+   and __builtin_nonlocal_goto.
+   Note: On ILP32 the documented buf size is not enough PR84150.  */
+#define STACK_SAVEAREA_MODE(LEVEL)                     \
+  ((LEVEL) == SAVE_NONLOCAL ? E_CDImode : Pmode)
+
 #define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNUM)
 
 #define RETURN_ADDR_RTX aarch64_return_addr
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index e16e841fae0d..7708506eacff 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1199,6 +1199,88 @@
                      (const_int 1)))]
 )
 
+(define_expand "save_stack_nonlocal"
+  [(set (match_operand 0 "memory_operand")
+        (match_operand 1 "register_operand"))]
+  ""
+{
+  rtx stack_slot = adjust_address (operands[0], Pmode, 0);
+  emit_move_insn (stack_slot, operands[1]);
+
+  if (aarch64_gcs_enabled ())
+    {
+      /* Save GCS with code like
+               mov     x16, 1
+               chkfeat x16
+               tbnz    x16, 0, .L_done
+               mrs     tmp, gcspr_el0
+               str     tmp, [%0, 8]
+       .L_done:  */
+
+      rtx done_label = gen_label_rtx ();
+      rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
+      emit_move_insn (r16, const1_rtx);
+      emit_insn (gen_aarch64_chkfeat ());
+      emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+      rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE 
(Pmode));
+      rtx gcs = gen_reg_rtx (Pmode);
+      emit_insn (gen_aarch64_load_gcspr (gcs));
+      emit_move_insn (gcs_slot, gcs);
+      emit_label (done_label);
+    }
+  DONE;
+})
+
+(define_expand "restore_stack_nonlocal"
+  [(set (match_operand 0 "register_operand" "")
+       (match_operand 1 "memory_operand" ""))]
+  ""
+{
+  rtx stack_slot = adjust_address (operands[1], Pmode, 0);
+  emit_move_insn (operands[0], stack_slot);
+
+  if (aarch64_gcs_enabled ())
+    {
+      /* Restore GCS with code like
+               mov     x16, 1
+               chkfeat x16
+               tbnz    x16, 0, .L_done
+               ldr     tmp1, [%1, 8]
+               mrs     tmp2, gcspr_el0
+               subs    tmp2, tmp1, tmp2
+               b.eq    .L_done
+       .L_loop:
+               gcspopm
+               subs    tmp2, tmp2, 8
+               b.ne    .L_loop
+       .L_done:  */
+
+      rtx loop_label = gen_label_rtx ();
+      rtx done_label = gen_label_rtx ();
+      rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
+      emit_move_insn (r16, const1_rtx);
+      emit_insn (gen_aarch64_chkfeat ());
+      emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+      rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE 
(Pmode));
+      rtx gcs_old = gen_reg_rtx (Pmode);
+      emit_move_insn (gcs_old, gcs_slot);
+      rtx gcs_now = gen_reg_rtx (Pmode);
+      emit_insn (gen_aarch64_load_gcspr (gcs_now));
+      emit_insn (gen_subdi3_compare1 (gcs_now, gcs_old, gcs_now));
+      rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+      rtx cmp_rtx = gen_rtx_fmt_ee (EQ, DImode, cc_reg, const0_rtx);
+      emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, done_label));
+      emit_label (loop_label);
+      emit_insn (gen_aarch64_gcspopm_xzr ());
+      emit_insn (gen_adddi3_compare0 (gcs_now, gcs_now, GEN_INT (-8)));
+      cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+      cmp_rtx = gen_rtx_fmt_ee (NE, DImode, cc_reg, const0_rtx);
+      emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, loop_label));
+      emit_label (done_label);
+    }
+  DONE;
+})
+
 ;; -------------------------------------------------------------------
 ;; Subroutine calls and sibcalls
 ;; -------------------------------------------------------------------
diff --git a/gcc/tree-nested.cc b/gcc/tree-nested.cc
index a54e72c32370..dfbd3432ce10 100644
--- a/gcc/tree-nested.cc
+++ b/gcc/tree-nested.cc
@@ -783,8 +783,8 @@ get_nl_goto_field (struct nesting_info *info)
       else
        type = lang_hooks.types.type_for_mode (Pmode, 1);
 
-      scalar_int_mode mode
-       = as_a <scalar_int_mode> (STACK_SAVEAREA_MODE (SAVE_NONLOCAL));
+      fixed_size_mode mode
+       = as_a <fixed_size_mode> (STACK_SAVEAREA_MODE (SAVE_NONLOCAL));
       size = GET_MODE_SIZE (mode);
       size = size / GET_MODE_SIZE (Pmode);
       size = size + 1;

Reply via email to