Szabolcs Nagy <[email protected]> writes:
> The expected way to handle eh_return is to pass the stack adjustment
> offset and landing pad address via
>
> EH_RETURN_STACKADJ_RTX
> EH_RETURN_HANDLER_RTX
>
> to the epilogue that is shared between normal return paths and the
> eh_return paths. EH_RETURN_HANDLER_RTX is the stack slot of the
> return address that is overwritten with the landing pad in the
> eh_return case and EH_RETURN_STACKADJ_RTX is a register added to sp
> right before return and it is set to 0 in the normal return case.
>
> The issue with this design is that eh_return and normal return may
> require different return sequence but there is no way to distinguish
> the two cases in the epilogue (the stack adjustment may be 0 in the
> eh_return case too).
>
> The reason eh_return and normal return requires different return
> sequence is that control flow integrity hardening may need to treat
> eh_return as a forward-edge transfer (it is not returning to the
> previous stack frame) and normal return as a backward-edge one.
> In case of AArch64 forward-edge is protected by BTI and requires br
> instruction and backward-edge is protected by PAUTH or GCS and
> requires ret (or authenticated ret) instruction.
>
> This patch resolves the issue by introducing EH_RETURN_TAKEN_RTX that
> is a flag set to 1 in the eh_return path and 0 in normal return paths.
> Branching on the EH_RETURN_TAKEN_RTX flag, the right return sequence
> can be used in the epilogue.
>
> The handler could be passed the old way via clobbering the return
> address, but since now the eh_return case can be distinguished, the
> handler can be in a different register than x30 and no stack frame
> is needed for eh_return.
>
> This patch fixes a return to anywhere gadget in the unwinder with
> existing standard branch protection as well as makes EH return
> compatible with the Guarded Control Stack (GCS) extension.
>
> Some tests are adjusted because eh_return no longer prevents pac-ret
> in the normal return path.
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-protos.h (aarch64_eh_return_handler_rtx):
> Remove.
> * config/aarch64/aarch64.cc (aarch64_return_address_signing_enabled):
> Sign return address even in functions with eh_return.
> (aarch64_expand_epilogue): Conditionally return with br or ret.
> (aarch64_eh_return_handler_rtx): Remove.
> * config/aarch64/aarch64.h (EH_RETURN_TAKEN_RTX): Define.
> (EH_RETURN_STACKADJ_RTX): Change to R5.
> (EH_RETURN_HANDLER_RTX): Change to R6.
> * df-scan.cc: Handle EH_RETURN_TAKEN_RTX.
> * doc/tm.texi: Regenerate.
> * doc/tm.texi.in: Document EH_RETURN_TAKEN_RTX.
> * except.cc (expand_eh_return): Handle EH_RETURN_TAKEN_RTX.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/return_address_sign_1.c: Move func4 to ...
> * gcc.target/aarch64/return_address_sign_2.c: ... here and fix the
> scan asm check.
> * gcc.target/aarch64/return_address_sign_b_1.c: Move func4 to ...
> * gcc.target/aarch64/return_address_sign_b_2.c: ... here and fix the
> scan asm check.
OK, thanks!
Richard
> ---
> v2:
> - Introduce EH_RETURN_TAKEN_RTX instead of abusing EH_RETURN_STACKADJ_RTX.
> - Merge test fixes.
> ---
> gcc/config/aarch64/aarch64-protos.h | 1 -
> gcc/config/aarch64/aarch64.cc | 88 ++++++-------------
> gcc/config/aarch64/aarch64.h | 9 +-
> gcc/df-scan.cc | 10 +++
> gcc/doc/tm.texi | 12 +++
> gcc/doc/tm.texi.in | 12 +++
> gcc/except.cc | 20 +++++
> .../aarch64/return_address_sign_1.c | 13 +--
> .../aarch64/return_address_sign_2.c | 17 +++-
> .../aarch64/return_address_sign_b_1.c | 11 ---
> .../aarch64/return_address_sign_b_2.c | 17 +++-
> 11 files changed, 116 insertions(+), 94 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h
> b/gcc/config/aarch64/aarch64-protos.h
> index 60a55f4bc19..80296024f04 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -859,7 +859,6 @@ machine_mode aarch64_hard_regno_caller_save_mode
> (unsigned, unsigned,
> machine_mode);
> int aarch64_uxt_size (int, HOST_WIDE_INT);
> int aarch64_vec_fpconst_pow_of_2 (rtx);
> -rtx aarch64_eh_return_handler_rtx (void);
> rtx aarch64_mask_from_zextract_ops (rtx, rtx);
> const char *aarch64_output_move_struct (rtx *operands);
> rtx aarch64_return_addr_rtx (void);
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index a28b66acf6a..5cdb33dd3dc 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -9113,17 +9113,6 @@ aarch64_return_address_signing_enabled (void)
> /* This function should only be called after frame laid out. */
> gcc_assert (cfun->machine->frame.laid_out);
>
> - /* Turn return address signing off in any function that uses
> - __builtin_eh_return. The address passed to __builtin_eh_return
> - is not signed so either it has to be signed (with original sp)
> - or the code path that uses it has to avoid authenticating it.
> - Currently eh return introduces a return to anywhere gadget, no
> - matter what we do here since it uses ret with user provided
> - address. An ideal fix for that is to use indirect branch which
> - can be protected with BTI j (to some extent). */
> - if (crtl->calls_eh_return)
> - return false;
> -
> /* If signing scope is AARCH_FUNCTION_NON_LEAF, we only sign a leaf
> function
> if its LR is pushed onto stack. */
> return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
> @@ -10060,11 +10049,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1,
> rtx temp2,
> /* Return 1 if the register is used by the epilogue. We need to say the
> return register is used, but only after epilogue generation is complete.
> Note that in the case of sibcalls, the values "used by the epilogue" are
> - considered live at the start of the called function.
> -
> - For SIMD functions we need to return 1 for FP registers that are saved and
> - restored by a function but are not zero in call_used_regs. If we do not
> do
> - this optimizations may remove the restore of the register. */
> + considered live at the start of the called function. */
>
> int
> aarch64_epilogue_uses (int regno)
> @@ -10468,6 +10453,30 @@ aarch64_expand_epilogue (bool for_sibcall)
> RTX_FRAME_RELATED_P (insn) = 1;
> }
>
> + /* Stack adjustment for exception handler. */
> + if (crtl->calls_eh_return && !for_sibcall)
> + {
> + /* If the EH_RETURN_TAKEN_RTX flag is set then we need
> + to unwind the stack and jump to the handler, otherwise
> + skip this eh_return logic and continue with normal
> + return after the label. We have already reset the CFA
> + to be SP; letting the CFA move during this adjustment
> + is just as correct as retaining the CFA from the body
> + of the function. Therefore, do nothing special. */
> + rtx label = gen_label_rtx ();
> + rtx x = gen_rtx_EQ (VOIDmode, EH_RETURN_TAKEN_RTX, const0_rtx);
> + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> + gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
> + rtx jump = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
> + JUMP_LABEL (jump) = label;
> + LABEL_NUSES (label)++;
> + emit_insn (gen_add2_insn (stack_pointer_rtx,
> + EH_RETURN_STACKADJ_RTX));
> + emit_jump_insn (gen_indirect_jump (EH_RETURN_HANDLER_RTX));
> + emit_barrier ();
> + emit_label (label);
> + }
> +
> /* We prefer to emit the combined return/authenticate instruction RETAA,
> however there are three cases in which we must instead emit an explicit
> authentication instruction.
> @@ -10497,58 +10506,11 @@ aarch64_expand_epilogue (bool for_sibcall)
> RTX_FRAME_RELATED_P (insn) = 1;
> }
>
> - /* Stack adjustment for exception handler. */
> - if (crtl->calls_eh_return && !for_sibcall)
> - {
> - /* We need to unwind the stack by the offset computed by
> - EH_RETURN_STACKADJ_RTX. We have already reset the CFA
> - to be SP; letting the CFA move during this adjustment
> - is just as correct as retaining the CFA from the body
> - of the function. Therefore, do nothing special. */
> - emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
> - }
> -
> emit_use (gen_rtx_REG (DImode, LR_REGNUM));
> if (!for_sibcall)
> emit_jump_insn (ret_rtx);
> }
>
> -/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
> - normally or return to a previous frame after unwinding.
> -
> - An EH return uses a single shared return sequence. The epilogue is
> - exactly like a normal epilogue except that it has an extra input
> - register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
> - that must be applied after the frame has been destroyed. An extra label
> - is inserted before the epilogue which initializes this register to zero,
> - and this is the entry point for a normal return.
> -
> - An actual EH return updates the return address, initializes the stack
> - adjustment and jumps directly into the epilogue (bypassing the zeroing
> - of the adjustment). Since the return address is typically saved on the
> - stack when a function makes a call, the saved LR must be updated outside
> - the epilogue.
> -
> - This poses problems as the store is generated well before the epilogue,
> - so the offset of LR is not known yet. Also optimizations will remove the
> - store as it appears dead, even after the epilogue is generated (as the
> - base or offset for loading LR is different in many cases).
> -
> - To avoid these problems this implementation forces the frame pointer
> - in eh_return functions so that the location of LR is fixed and known
> early.
> - It also marks the store volatile, so no optimization is permitted to
> - remove the store. */
> -rtx
> -aarch64_eh_return_handler_rtx (void)
> -{
> - rtx tmp = gen_frame_mem (Pmode,
> - plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
> -
> - /* Mark the store volatile, so no optimization is permitted to remove it.
> */
> - MEM_VOLATILE_P (tmp) = true;
> - return tmp;
> -}
> -
> /* Output code to add DELTA to the first argument, and then jump
> to FUNCTION. Used for C++ multiple inheritance. */
> static void
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 2f0777a37ac..58f7eeb565f 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -583,9 +583,12 @@ enum class aarch64_feature : unsigned char {
> /* Output assembly strings after .cfi_startproc is emitted. */
> #define ASM_POST_CFI_STARTPROC aarch64_post_cfi_startproc
>
> -/* For EH returns X4 contains the stack adjustment. */
> -#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
> -#define EH_RETURN_HANDLER_RTX aarch64_eh_return_handler_rtx ()
> +/* For EH returns X4 is a flag that is set in the EH return
> + code paths and then X5 and X6 contain the stack adjustment
> + and return address respectively. */
> +#define EH_RETURN_TAKEN_RTX gen_rtx_REG (Pmode, R4_REGNUM)
> +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R5_REGNUM)
> +#define EH_RETURN_HANDLER_RTX gen_rtx_REG (Pmode, R6_REGNUM)
>
> #undef TARGET_COMPUTE_FRAME_LAYOUT
> #define TARGET_COMPUTE_FRAME_LAYOUT aarch64_layout_frame
> diff --git a/gcc/df-scan.cc b/gcc/df-scan.cc
> index 9515740728c..934c9ca2d81 100644
> --- a/gcc/df-scan.cc
> +++ b/gcc/df-scan.cc
> @@ -3720,6 +3720,16 @@ df_get_exit_block_use_set (bitmap exit_block_uses)
> }
> #endif
>
> +#ifdef EH_RETURN_TAKEN_RTX
> + if ((!targetm.have_epilogue () || ! epilogue_completed)
> + && crtl->calls_eh_return)
> + {
> + rtx tmp = EH_RETURN_TAKEN_RTX;
> + if (tmp && REG_P (tmp))
> + df_mark_reg (tmp, exit_block_uses);
> + }
> +#endif
> +
> if ((!targetm.have_epilogue () || ! epilogue_completed)
> && crtl->calls_eh_return)
> {
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index f7ac806ff15..21bfe160a8b 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -3506,6 +3506,18 @@ If you want to support call frame exception handling,
> you must
> define either this macro or the @code{eh_return} instruction pattern.
> @end defmac
>
> +@defmac EH_RETURN_TAKEN_RTX
> +A C expression whose value is RTL representing a location in which
> +to store if the EH return path was taken instead of a normal return.
> +This macro allows conditionally executing different code in the
> +epilogue for the EH and normal return cases.
> +
> +When this macro is defined, the macros @code{EH_RETURN_STACKADJ_RTX}
> +and @code{EH_RETURN_HANDLER_RTX} are only meaningful in the epilogue
> +when 1 is stored to the specified location. The value 0 means normal
> +return.
> +@end defmac
> +
> @defmac RETURN_ADDR_OFFSET
> If defined, an integer-valued C expression for which rtl will be generated
> to add it to the exception handler address before it is searched in the
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index 141027e0bb4..ee9dc5c5fc3 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -2742,6 +2742,18 @@ If you want to support call frame exception handling,
> you must
> define either this macro or the @code{eh_return} instruction pattern.
> @end defmac
>
> +@defmac EH_RETURN_TAKEN_RTX
> +A C expression whose value is RTL representing a location in which
> +to store if the EH return path was taken instead of a normal return.
> +This macro allows conditionally executing different code in the
> +epilogue for the EH and normal return cases.
> +
> +When this macro is defined, the macros @code{EH_RETURN_STACKADJ_RTX}
> +and @code{EH_RETURN_HANDLER_RTX} are only meaningful in the epilogue
> +when 1 is stored to the specified location. The value 0 means normal
> +return.
> +@end defmac
> +
> @defmac RETURN_ADDR_OFFSET
> If defined, an integer-valued C expression for which rtl will be generated
> to add it to the exception handler address before it is searched in the
> diff --git a/gcc/except.cc b/gcc/except.cc
> index e728aa43b6a..508f8bb3e26 100644
> --- a/gcc/except.cc
> +++ b/gcc/except.cc
> @@ -2281,6 +2281,10 @@ expand_eh_return (void)
> emit_move_insn (EH_RETURN_STACKADJ_RTX, const0_rtx);
> #endif
>
> +#ifdef EH_RETURN_TAKEN_RTX
> + emit_move_insn (EH_RETURN_TAKEN_RTX, const0_rtx);
> +#endif
> +
> around_label = gen_label_rtx ();
> emit_jump (around_label);
>
> @@ -2291,6 +2295,10 @@ expand_eh_return (void)
> emit_move_insn (EH_RETURN_STACKADJ_RTX, crtl->eh.ehr_stackadj);
> #endif
>
> +#ifdef EH_RETURN_TAKEN_RTX
> + emit_move_insn (EH_RETURN_TAKEN_RTX, const1_rtx);
> +#endif
> +
> if (targetm.have_eh_return ())
> emit_insn (targetm.gen_eh_return (crtl->eh.ehr_handler));
> else
> @@ -2301,7 +2309,19 @@ expand_eh_return (void)
> error ("%<__builtin_eh_return%> not supported on this target");
> }
>
> +#ifdef EH_RETURN_TAKEN_RTX
> + rtx_code_label *eh_done_label = gen_label_rtx ();
> + emit_jump (eh_done_label);
> +#endif
> +
> emit_label (around_label);
> +
> +#ifdef EH_RETURN_TAKEN_RTX
> + for (rtx tmp : { EH_RETURN_STACKADJ_RTX, EH_RETURN_HANDLER_RTX })
> + if (tmp && REG_P (tmp))
> + emit_clobber (tmp);
> + emit_label (eh_done_label);
> +#endif
> }
>
> /* Convert a ptr_mode address ADDR_TREE to a Pmode address controlled by
> diff --git a/gcc/testsuite/gcc.target/aarch64/return_address_sign_1.c
> b/gcc/testsuite/gcc.target/aarch64/return_address_sign_1.c
> index 232ba67ade0..114a9dacb3f 100644
> --- a/gcc/testsuite/gcc.target/aarch64/return_address_sign_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/return_address_sign_1.c
> @@ -37,16 +37,5 @@ func3 (int a, int b, int c)
> /* autiasp */
> }
>
> -/* eh_return. */
> -void __attribute__ ((target ("arch=armv8.3-a")))
> -func4 (long offset, void *handler, int *ptr, int imm1, int imm2)
> -{
> - /* no paciasp */
> - *ptr = imm1 + foo (imm1) + imm2;
> - __builtin_eh_return (offset, handler);
> - /* no autiasp */
> - return;
> -}
> -
> -/* { dg-final { scan-assembler-times "autiasp" 3 } } */
> /* { dg-final { scan-assembler-times "paciasp" 3 } } */
> +/* { dg-final { scan-assembler-times "autiasp" 3 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/return_address_sign_2.c
> b/gcc/testsuite/gcc.target/aarch64/return_address_sign_2.c
> index a4bc5b45333..d93492c3c43 100644
> --- a/gcc/testsuite/gcc.target/aarch64/return_address_sign_2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/return_address_sign_2.c
> @@ -14,5 +14,18 @@ func1 (int a, int b, int c)
> /* retaa */
> }
>
> -/* { dg-final { scan-assembler-times "paciasp" 1 } } */
> -/* { dg-final { scan-assembler-times "retaa" 1 } } */
> +/* eh_return. */
> +void __attribute__ ((target ("arch=armv8.3-a")))
> +func4 (long offset, void *handler, int *ptr, int imm1, int imm2)
> +{
> + /* paciasp */
> + *ptr = imm1 + foo (imm1) + imm2;
> + if (handler)
> + /* br */
> + __builtin_eh_return (offset, handler);
> + /* retaa */
> + return;
> +}
> +
> +/* { dg-final { scan-assembler-times "paciasp" 2 } } */
> +/* { dg-final { scan-assembler-times "retaa" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_1.c
> b/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_1.c
> index 43e32ab6cb7..697fa30dc5a 100644
> --- a/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_1.c
> @@ -37,16 +37,5 @@ func3 (int a, int b, int c)
> /* autibsp */
> }
>
> -/* eh_return. */
> -void __attribute__ ((target ("arch=armv8.3-a")))
> -func4 (long offset, void *handler, int *ptr, int imm1, int imm2)
> -{
> - /* no pacibsp */
> - *ptr = imm1 + foo (imm1) + imm2;
> - __builtin_eh_return (offset, handler);
> - /* no autibsp */
> - return;
> -}
> -
> /* { dg-final { scan-assembler-times "pacibsp" 3 } } */
> /* { dg-final { scan-assembler-times "autibsp" 3 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_2.c
> b/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_2.c
> index 9ed64ce0591..452240b731e 100644
> --- a/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_2.c
> @@ -14,5 +14,18 @@ func1 (int a, int b, int c)
> /* retab */
> }
>
> -/* { dg-final { scan-assembler-times "pacibsp" 1 } } */
> -/* { dg-final { scan-assembler-times "retab" 1 } } */
> +/* eh_return. */
> +void __attribute__ ((target ("arch=armv8.3-a")))
> +func4 (long offset, void *handler, int *ptr, int imm1, int imm2)
> +{
> + /* pacibsp */
> + *ptr = imm1 + foo (imm1) + imm2;
> + if (handler)
> + /* br */
> + __builtin_eh_return (offset, handler);
> + /* retab */
> + return;
> +}
> +
> +/* { dg-final { scan-assembler-times "pacibsp" 2 } } */
> +/* { dg-final { scan-assembler-times "retab" 2 } } */