> Thanks - I have no further comments on this patch. We probably need to
> implement the same on AArch64 too in order to avoid similar problems.

Here's the implementation for aarch64, very similar but simpler since there is 
no shortage of scratch registers; the only thing to note is the new blockage 
pattern.  This was tested on real hardware but not with Linux, instead with 
Darwin (experimental port of the toolchain to iOS) and makes it possible to 
pass ACATS (Ada conformance testsuite which requires stack checking).

There is also a couple of tweaks for the ARM implementation: a cosmetic one 
for the probe_stack pattern and one for the output_probe_stack_range loop.


2015-10-06  Tristan Gingold  <ging...@adacore.com>
            Eric Botcazou  <ebotca...@adacore.com>

        PR middle-end/65958
        * config/aarch64/aarch64-protos.h (aarch64_output_probe_stack-range):
        Declare.
        * config/aarch64/aarch64.md: Declare UNSPECV_BLOCKAGE and
        UNSPEC_PROBE_STACK_RANGE.
        (blockage): New instruction.
        (probe_stack_range): Likewise.
        * config/aarch64/aarch64.c (aarch64_emit_probe_stack_range): New
        function.
        (aarch64_output_probe_stack_range): Likewise.
        (aarch64_expand_prologue): Invoke aarch64_emit_probe_stack_range if
        static builtin stack checking is enabled.
        * config/aarch64/aarch64-linux.h (STACK_CHECK_STATIC_BUILTIN):
        Define.

        * config/arm/arm.c (arm_emit_probe_stack_range): Adjust comment.
        (output_probe_stack_range): Rotate the loop and simplify.
        (thumb1_expand_prologue): Tweak sorry message.
        * config/arm/arm.md (probe_stack): Use bare string.


2015-10-06  Eric Botcazou  <ebotca...@adacore.com>

        * gcc.target/aarch64/stack-checking.c: New test.

-- 
Eric Botcazou
Index: config/aarch64/aarch64-linux.h
===================================================================
--- config/aarch64/aarch64-linux.h	(revision 228512)
+++ config/aarch64/aarch64-linux.h	(working copy)
@@ -88,4 +88,7 @@
 #undef TARGET_BINDS_LOCAL_P
 #define TARGET_BINDS_LOCAL_P default_binds_local_p_2
 
+/* Define this to be nonzero if static stack checking is supported.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
 #endif  /* GCC_AARCH64_LINUX_H */
Index: config/aarch64/aarch64-protos.h
===================================================================
--- config/aarch64/aarch64-protos.h	(revision 228512)
+++ config/aarch64/aarch64-protos.h	(working copy)
@@ -316,6 +316,7 @@ void aarch64_asm_output_labelref (FILE *
 void aarch64_cpu_cpp_builtins (cpp_reader *);
 void aarch64_elf_asm_named_section (const char *, unsigned, tree);
 const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
+const char * aarch64_output_probe_stack_range (rtx, rtx);
 void aarch64_err_no_fpadvsimd (machine_mode, const char *);
 void aarch64_expand_epilogue (bool);
 void aarch64_expand_mov_immediate (rtx, rtx);
Index: config/aarch64/aarch64.c
===================================================================
--- config/aarch64/aarch64.c	(revision 228512)
+++ config/aarch64/aarch64.c	(working copy)
@@ -76,6 +76,7 @@
 #include "sched-int.h"
 #include "cortex-a57-fma-steering.h"
 #include "target-globals.h"
+#include "common/common-target.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -2144,6 +2145,167 @@ aarch64_libgcc_cmp_return_mode (void)
   return SImode;
 }
 
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+#if (PROBE_INTERVAL % 4096) != 0
+#error Cannot use indexed address calculation for stack probing
+#endif
+
+#if PROBE_INTERVAL > 4096
+#error Cannot use indexed addressing mode for stack probing
+#endif
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+   inclusive.  These are offsets from the current stack pointer.  */
+
+static void
+aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+  rtx reg9 = gen_rtx_REG (Pmode, 9);
+
+  /* The following code uses indexed address calculation on FIRST.  */
+  gcc_assert ((first % 4096) == 0);
+
+  /* See if we have a constant small number of probes to generate.  If so,
+     that's the easy case.  */
+  if (size <= PROBE_INTERVAL)
+    {
+      emit_set_insn (reg9,
+		     plus_constant (Pmode, stack_pointer_rtx,
+					   -(first + PROBE_INTERVAL)));
+      emit_stack_probe (plus_constant (Pmode, reg9, PROBE_INTERVAL - size));
+    }
+
+  /* The run-time loop is made up of 8 insns in the generic case while the
+     compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
+  else if (size <= 4 * PROBE_INTERVAL)
+    {
+      HOST_WIDE_INT i, rem;
+
+      emit_set_insn (reg9,
+		     plus_constant (Pmode, stack_pointer_rtx,
+					   -(first + PROBE_INTERVAL)));
+      emit_stack_probe (reg9);
+
+      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
+	 it exceeds SIZE.  If only two probes are needed, this will not
+	 generate any code.  Then probe at FIRST + SIZE.  */
+      for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+	{
+	  emit_set_insn (reg9, plus_constant (Pmode, reg9, -PROBE_INTERVAL));
+	  emit_stack_probe (reg9);
+	}
+
+      rem = size - (i - PROBE_INTERVAL);
+      if (rem > 256)
+	{
+	  emit_set_insn (reg9, plus_constant (Pmode, reg9, -PROBE_INTERVAL));
+	  emit_stack_probe (plus_constant (Pmode, reg9, PROBE_INTERVAL - rem));
+	}
+      else
+	emit_stack_probe (plus_constant (Pmode, reg9, -rem));
+    }
+
+  /* Otherwise, do the same as above, but in a loop.  Note that we must be
+     extra careful with variables wrapping around because we might be at
+     the very top (or the very bottom) of the address space and we have
+     to be able to handle this case properly; in particular, we use an
+     equality test for the loop condition.  */
+  else
+    {
+      HOST_WIDE_INT rounded_size;
+      rtx reg10 = gen_rtx_REG (Pmode, 10);
+
+      /* Step 1: round SIZE to the previous multiple of the interval.  */
+
+      rounded_size = size & -PROBE_INTERVAL;
+
+
+      /* Step 2: compute initial and final value of the loop counter.  */
+
+      /* TEST_ADDR = SP + FIRST.  */
+      emit_set_insn (reg9,
+		     plus_constant (Pmode, stack_pointer_rtx, -first));
+
+      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
+      emit_set_insn (reg10,
+		     plus_constant (Pmode, stack_pointer_rtx,
+				    -(first + rounded_size)));
+
+
+      /* Step 3: the loop
+
+	 do
+	   {
+	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+	     probe at TEST_ADDR
+	   }
+	 while (TEST_ADDR != LAST_ADDR)
+
+	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+	 until it is equal to ROUNDED_SIZE.  */
+
+      emit_insn (gen_probe_stack_range (reg9, reg9, reg10));
+
+
+      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+	 that SIZE is equal to ROUNDED_SIZE.  */
+
+      if (size != rounded_size)
+	{
+	  HOST_WIDE_INT rem = size - rounded_size;
+
+	  if (rem > 256)
+	    {
+	      emit_set_insn (reg10,
+			     plus_constant (Pmode, reg10, -PROBE_INTERVAL));
+	      emit_stack_probe (plus_constant (Pmode, reg10,
+					       PROBE_INTERVAL - rem));
+	    }
+	  else
+	    emit_stack_probe (plus_constant (Pmode, reg10, -rem));
+	}
+    }
+
+  /* Make sure nothing is scheduled before we are done.  */
+  emit_insn (gen_blockage ());
+}
+
+/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
+   absolute addresses.  */
+
+const char *
+aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
+{
+  static int labelno = 0;
+  char loop_lab[32];
+  rtx xops[2];
+
+  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
+
+  /* Loop.  */
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
+  xops[0] = reg1;
+  xops[1] = GEN_INT (PROBE_INTERVAL);
+  output_asm_insn ("sub\t%0, %0, %1", xops);
+
+  /* Probe at TEST_ADDR.  */
+  output_asm_insn ("str\txzr, [%0]", xops);
+
+  /* Test if TEST_ADDR == LAST_ADDR.  */
+  xops[1] = reg2;
+  output_asm_insn ("cmp\t%0, %1", xops);
+
+  /* Branch.  */
+  fputs ("\tb.ne\t", asm_out_file);
+  assemble_name_raw (asm_out_file, loop_lab);
+  fputc ('\n', asm_out_file);
+
+  return "";
+}
+
 static bool
 aarch64_frame_pointer_required (void)
 {
@@ -2544,6 +2706,18 @@ aarch64_expand_prologue (void)
   if (flag_stack_usage_info)
     current_function_static_stack_size = frame_size;
 
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    {
+      if (crtl->is_leaf && !cfun->calls_alloca)
+	{
+	  if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
+	    aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
+					    frame_size - STACK_CHECK_PROTECT);
+	}
+      else if (frame_size > 0)
+	aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
+    }
+
   /* Store pairs and load pairs have a range only -512 to 504.  */
   if (offset >= 512)
     {
Index: config/aarch64/aarch64.md
===================================================================
--- config/aarch64/aarch64.md	(revision 228512)
+++ config/aarch64/aarch64.md	(working copy)
@@ -104,6 +104,7 @@ (define_c_enum "unspec" [
     UNSPEC_MB
     UNSPEC_NOP
     UNSPEC_PRLG_STK
+    UNSPEC_PROBE_STACK_RANGE
     UNSPEC_RBIT
     UNSPEC_SISD_NEG
     UNSPEC_SISD_SSHL
@@ -134,6 +135,7 @@ (define_c_enum "unspecv" [
     UNSPECV_SET_FPCR		; Represent assign of FPCR content.
     UNSPECV_GET_FPSR		; Represent fetch of FPSR content.
     UNSPECV_SET_FPSR		; Represent assign of FPSR content.
+    UNSPECV_BLOCKAGE		; Represent a blockage
   ]
 )
 
@@ -4807,6 +4809,27 @@ (define_insn "stack_tie"
   [(set_attr "length" "0")]
 )
 
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")
+   (set_attr "type" "block")]
+)
+
+(define_insn "probe_stack_range"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
+			     (match_operand:DI 2 "register_operand" "r")]
+			     UNSPEC_PROBE_STACK_RANGE))]
+  ""
+{
+  return aarch64_output_probe_stack_range (operands[0], operands[2]);
+})
+
 ;; Named pattern for expanding thread pointer reference.
 (define_expand "get_thread_pointerdi"
   [(match_operand:DI 0 "register_operand" "=r")]
Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c	(revision 228512)
+++ config/arm/arm.c	(working copy)
@@ -21262,11 +21262,12 @@ arm_emit_probe_stack_range (HOST_WIDE_IN
 
       /* Step 3: the loop
 
-	 while (TEST_ADDR != LAST_ADDR)
+	 do
 	   {
 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
 	     probe at TEST_ADDR
 	   }
+	 while (TEST_ADDR != LAST_ADDR)
 
 	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
 	 until it is equal to ROUNDED_SIZE.  */
@@ -21311,22 +21312,22 @@ output_probe_stack_range (rtx reg1, rtx
 
   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
 
+  /* Loop.  */
   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
 
-   /* Test if TEST_ADDR == LAST_ADDR.  */
+  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
   xops[0] = reg1;
-  xops[1] = reg2;
-  output_asm_insn ("cmp\t%0, %1", xops);
+  xops[1] = GEN_INT (PROBE_INTERVAL);
+  output_asm_insn ("sub\t%0, %0, %1", xops);
 
-  if (TARGET_THUMB2)
-    fputs ("\tittt\tne\n", asm_out_file);
+  /* Probe at TEST_ADDR.  */
+  output_asm_insn ("str\tr0, [%0, #0]", xops);
 
-  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
-  xops[1] = GEN_INT (PROBE_INTERVAL);
-  output_asm_insn ("subne\t%0, %0, %1", xops);
+  /* Test if TEST_ADDR == LAST_ADDR.  */
+  xops[1] = reg2;
+  output_asm_insn ("cmp\t%0, %1", xops);
 
-  /* Probe at TEST_ADDR and branch.  */
-  output_asm_insn ("strne\tr0, [%0, #0]", xops);
+  /* Branch.  */
   fputs ("\tbne\t", asm_out_file);
   assemble_name_raw (asm_out_file, loop_lab);
   fputc ('\n', asm_out_file);
@@ -24869,7 +24870,7 @@ thumb1_expand_prologue (void)
 
   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
-    sorry ("-fstack-check=specific for THUMB1");
+    sorry ("-fstack-check=specific for Thumb-1");
 
   amount = offsets->outgoing_args - offsets->saved_regs;
   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md	(revision 228512)
+++ config/arm/arm.md	(working copy)
@@ -8262,9 +8262,7 @@ (define_insn "probe_stack"
   [(set (match_operand 0 "memory_operand" "=m")
         (unspec [(const_int 0)] UNSPEC_PROBE_STACK))]
   "TARGET_32BIT"
-{
-  return "str%?\\tr0, %0";
-}
+  "str%?\\tr0, %0"
   [(set_attr "type" "store1")
    (set_attr "predicable" "yes")]
 )
/* { dg-do run { target { *-*-linux* } } } */
/* { dg-options "-fstack-check" } */

int main(void)
{
  char *p;
  if (1)
    {
      char i[48];
      p = __builtin_alloca(8);
      p[0] = 1;
    }

  if (1)
    {
      char i[48], j[64];
      j[48] = 0;
    }

  return !p[0];
}

Reply via email to