The attached patch fixes a warning during Linux kernel compilation
on S/390 due to -mwarn-dynamicstack and runtime alignment of stack
variables with constant size causing cfun->calls_alloca to be set
(even if alloca is not used at all).  The patched code places
constant size runtime aligned variables in the "virtual stack
vars" area instead of creating a "virtual stack dynamic" area.

This behaviour is activated by defining

  #define ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE 1

in the backend; otherwise the old logic is used.

The kernel uses runtime alignment for the page structure (aligned
to 16 bytes), and apart from triggereing the alloca warning
(-mwarn-dynamicstack), the current Gcc also generates inefficient
code like

  aghi %r15,-160  # prologue: create stack frame
  lgr %r11,%r15   # prologue: generate frame pointer
  aghi %r15,-32   # space for dynamic stack

which could be simplified to

  aghi %r15,-192

(if later optimization passes are able to get rid of the frame
pointer).  Is there a specific reason why the patched behaviour
shouldn't be used for all platforms?

--

As the placement of runtime aligned stack variables with constant
size is done completely in the middleend, I don't see a way to fix
this in the backend.

The patch has been tested (only) on S/390[x].

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany
gcc/ChangeLog

        * cfgexpand.c (expand_stack_vars): Implement
        ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE.
        * explow.c (get_dynamic_stack_base): New function to return an address
        expression for the dynamic stack base when using
        ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE.
        (allocate_dynamic_stack_space): Use new functions.
        (align_dynamic_address, adjust_size_align): Move some code
        from allocate_dynamic_stack_space to new functions.
        * explow.h (get_dynamic_stack_base): Export.
        * doc/tm.texi (ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE): Document.
        * config/s390/s390.h (ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE): Define.
        * defaults.h (ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE): Define by
        default.

gcc/testsuite/ChangeLog

        * gcc.dg/stack-usage-2.c (foo3): Adapt expected warning.
>From f96098e0f399b2b9c932f174a4050cab38233b6c Mon Sep 17 00:00:00 2001
From: Dominik Vogt <v...@linux.vnet.ibm.com>
Date: Mon, 23 Nov 2015 14:35:50 +0100
Subject: [PATCH] Allocate constant size dynamic stack space in the
 prologue ...

... and place it in the virtual stack vars area, if the platform supports it.
On S/390 this saves adjusting the stack pointer twice and forcing the frame
pointer into existence.  It also removes the warning with -mwarn-dynamicstack
that is triggered by cfun->calls_alloca == 1.

This fixes a problem with the Linux kernel which aligns the page structure to
16 bytes at run time using inefficient code and issuing a bogus warning.
---
 gcc/cfgexpand.c                      |  21 +++++-
 gcc/config/s390/s390.h               |   4 ++
 gcc/defaults.h                       |   4 ++
 gcc/doc/tm.texi                      |   6 ++
 gcc/doc/tm.texi.in                   |   6 ++
 gcc/explow.c                         | 129 ++++++++++++++++++++++++++---------
 gcc/explow.h                         |   4 ++
 gcc/testsuite/gcc.dg/stack-usage-2.c |   4 +-
 8 files changed, 142 insertions(+), 36 deletions(-)

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 1990e10..bb49a0e 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -1079,8 +1079,25 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
 
       /* If there were any, allocate space.  */
       if (large_size > 0)
-	large_base = allocate_dynamic_stack_space (GEN_INT (large_size), 0,
-						   large_align, true);
+	{
+	  if (ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE)
+	    {
+	      HOST_WIDE_INT large_offset;
+	      HOST_WIDE_INT size;
+
+	      size = large_size + large_align / BITS_PER_UNIT;
+
+	      /* Allocate space in the prologue, at the beginning of the virtual
+		 stack vars area.  */
+	      large_offset = alloc_stack_frame_space (size, 1);
+	      large_base = get_dynamic_stack_base (GEN_INT (large_size),
+						   large_offset, large_align);
+	    }
+	  else
+	    /* Allocate space now.  */
+	    large_base = allocate_dynamic_stack_space (GEN_INT (large_size), 0,
+						       large_align, true);
+	}
     }
 
   for (si = 0; si < n; ++si)
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index a0faf13..073ce5c 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -594,6 +594,10 @@ extern const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER];
 #define STACK_DYNAMIC_OFFSET(FUNDECL) \
   (STACK_POINTER_OFFSET + crtl->outgoing_args_size)
 
+/* Constant size dynamic stack space can be allocated through the function
+   prologue to save the extra instructions to adjust the stack pointer.  */
+#define ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE 1
+
 /* Offset of first parameter from the argument pointer register value.
    We have a fake argument pointer register that points directly to
    the argument area.  */
diff --git a/gcc/defaults.h b/gcc/defaults.h
index 0f1c713..abd6ab2 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -1055,6 +1055,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define STACK_POINTER_OFFSET    0
 #endif
 
+#ifndef ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE
+#define ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE 0
+#endif
+
 #ifndef LOCAL_REGNO
 #define LOCAL_REGNO(REGNO)  0
 #endif
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index bde808b..921e98a 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -3015,6 +3015,12 @@ length of the outgoing arguments.  The default is correct for most
 machines.  See @file{function.c} for details.
 @end defmac
 
+@defmac ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE
+Define to non-zero to enable allocating constant dynamic stack space
+through the function prologue.  This affects only stack variables with
+run time alignment.
+@end defmac
+
 @defmac INITIAL_FRAME_ADDRESS_RTX
 A C expression whose value is RTL representing the address of the initial
 stack frame. This address is passed to @code{RETURN_ADDR_RTX} and
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 0677fc1..5512577 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -2621,6 +2621,12 @@ length of the outgoing arguments.  The default is correct for most
 machines.  See @file{function.c} for details.
 @end defmac
 
+@defmac ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE
+Define to non-zero to enable allocating constant dynamic stack space
+through the function prologue.  This affects only stack variables with
+run time alignment.
+@end defmac
+
 @defmac INITIAL_FRAME_ADDRESS_RTX
 A C expression whose value is RTL representing the address of the initial
 stack frame. This address is passed to @code{RETURN_ADDR_RTX} and
diff --git a/gcc/explow.c b/gcc/explow.c
index e6a69e0..c6848d3 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -1140,6 +1140,52 @@ record_new_stack_level (void)
     update_sjlj_context ();
 }
 
+/* Return an rtx doing runtime alignment to REQUIRED_ALIGN on TARGET.  */
+static rtx
+align_dynamic_address (rtx target, unsigned required_align)
+{
+  /* CEIL_DIV_EXPR needs to worry about the addition overflowing,
+     but we know it can't.  So add ourselves and then do
+     TRUNC_DIV_EXPR.  */
+  target = expand_binop (Pmode, add_optab, target,
+			 gen_int_mode (required_align / BITS_PER_UNIT - 1,
+				       Pmode),
+			 NULL_RTX, 1, OPTAB_LIB_WIDEN);
+  target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
+			  gen_int_mode (required_align / BITS_PER_UNIT,
+					Pmode),
+			  NULL_RTX, 1);
+  target = expand_mult (Pmode, target,
+			gen_int_mode (required_align / BITS_PER_UNIT,
+				      Pmode),
+			NULL_RTX, 1);
+
+  return target;
+}
+
+/* Common code used by allocate_dynamic_stack_space and get_dynamic_stack_base.
+   Adjust SIZE_ALIGN for SIZE, if needed, and returns the updated value.  */
+static unsigned
+adjust_size_align (rtx size, unsigned size_align)
+{
+  if (CONST_INT_P (size))
+    {
+      unsigned HOST_WIDE_INT lsb;
+
+      lsb = INTVAL (size);
+      lsb &= -lsb;
+
+      /* Watch out for overflow truncating to "unsigned".  */
+      if (lsb > UINT_MAX / BITS_PER_UNIT)
+	size_align = 1u << (HOST_BITS_PER_INT - 1);
+      else
+	size_align = (unsigned)lsb * BITS_PER_UNIT;
+    }
+  else if (size_align < BITS_PER_UNIT)
+    size_align = BITS_PER_UNIT;
+  return size_align;
+}
+
 /* Return an rtx representing the address of an area of memory dynamically
    pushed on the stack.
 
@@ -1216,22 +1262,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
   if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode)
     size = convert_to_mode (Pmode, size, 1);
 
-  /* Adjust SIZE_ALIGN, if needed.  */
-  if (CONST_INT_P (size))
-    {
-      unsigned HOST_WIDE_INT lsb;
-
-      lsb = INTVAL (size);
-      lsb &= -lsb;
-
-      /* Watch out for overflow truncating to "unsigned".  */
-      if (lsb > UINT_MAX / BITS_PER_UNIT)
-	size_align = 1u << (HOST_BITS_PER_INT - 1);
-      else
-	size_align = (unsigned)lsb * BITS_PER_UNIT;
-    }
-  else if (size_align < BITS_PER_UNIT)
-    size_align = BITS_PER_UNIT;
+  size_align = adjust_size_align (size, size_align);
 
   /* We can't attempt to minimize alignment necessary, because we don't
      know the final value of preferred_stack_boundary yet while executing
@@ -1473,23 +1504,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
     }
 
   if (must_align)
-    {
-      /* CEIL_DIV_EXPR needs to worry about the addition overflowing,
-	 but we know it can't.  So add ourselves and then do
-	 TRUNC_DIV_EXPR.  */
-      target = expand_binop (Pmode, add_optab, target,
-			     gen_int_mode (required_align / BITS_PER_UNIT - 1,
-					   Pmode),
-			     NULL_RTX, 1, OPTAB_LIB_WIDEN);
-      target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
-			      gen_int_mode (required_align / BITS_PER_UNIT,
-					    Pmode),
-			      NULL_RTX, 1);
-      target = expand_mult (Pmode, target,
-			    gen_int_mode (required_align / BITS_PER_UNIT,
-					  Pmode),
-			    NULL_RTX, 1);
-    }
+    target = align_dynamic_address (target, required_align);
 
   /* Now that we've committed to a return value, mark its alignment.  */
   mark_reg_pointer (target, required_align);
@@ -1499,6 +1514,54 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
 
   return target;
 }
+
+/* Return an rtx representing the address of an area of memory already
+   statically pushed onto the stack in the virtual stack vars area.  (It is
+   assumed that the area is allocated in the function prologue.)
+
+   Any required stack pointer alignment is preserved.
+
+   SIZE is an rtx representing the size of the area.  SIZE must be a constant
+   in VOIDmode or Pmode larger than zero.
+
+   OFFSET is the offset of the area into the virtual stack vars area.
+
+   REQUIRED_ALIGN is the alignment (in bits) required for the region
+   of memory.  */
+
+rtx
+get_dynamic_stack_base (rtx size, HOST_WIDE_INT offset, unsigned required_align)
+{
+  rtx target;
+  unsigned size_align;
+  unsigned extra;
+
+  size_align = adjust_size_align (size, 0);
+
+  if (crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
+    crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
+
+  extra = (required_align - BITS_PER_UNIT) / BITS_PER_UNIT;
+  size = plus_constant (Pmode, size, extra);
+  size = force_operand (size, NULL_RTX);
+  if (extra && size_align > BITS_PER_UNIT)
+    size_align = BITS_PER_UNIT;
+
+  if (size_align % MAX_SUPPORTED_STACK_ALIGNMENT != 0)
+    size = round_push (size);
+
+  target = gen_reg_rtx (Pmode);
+  emit_move_insn (target, virtual_stack_vars_rtx);
+  target = expand_binop (Pmode, add_optab, target,
+			 gen_int_mode (offset, Pmode),
+			 NULL_RTX, 1, OPTAB_LIB_WIDEN);
+  target = align_dynamic_address (target, required_align);
+
+  /* Now that we've committed to a return value, mark its alignment.  */
+  mark_reg_pointer (target, required_align);
+
+  return target;
+}
 
 /* A front end may want to override GCC's stack checking by providing a
    run-time routine to call to check the stack, so provide a mechanism for
diff --git a/gcc/explow.h b/gcc/explow.h
index 52113db..aa42efb 100644
--- a/gcc/explow.h
+++ b/gcc/explow.h
@@ -87,6 +87,10 @@ extern void record_new_stack_level (void);
 /* Allocate some space on the stack dynamically and return its address.  */
 extern rtx allocate_dynamic_stack_space (rtx, unsigned, unsigned, bool);
 
+/* Returns the address of the dynamic stack space without allocating it.  */
+extern rtx get_dynamic_stack_base (rtx size, HOST_WIDE_INT offset,
+				   unsigned required_align);
+
 /* Emit one stack probe at ADDRESS, an address within the stack.  */
 extern void emit_stack_probe (rtx);
 
diff --git a/gcc/testsuite/gcc.dg/stack-usage-2.c b/gcc/testsuite/gcc.dg/stack-usage-2.c
index c2527d2..7d246ec 100644
--- a/gcc/testsuite/gcc.dg/stack-usage-2.c
+++ b/gcc/testsuite/gcc.dg/stack-usage-2.c
@@ -17,7 +17,9 @@ int foo2 (void)  /* { dg-warning "stack usage is \[0-9\]* bytes" } */
   return 0;
 }
 
-int foo3 (void) /* { dg-warning "stack usage might be \[0-9\]* bytes" } */
+/* The actual warning depends on whether stack space is allocated dynamically
+   or staically.  */
+int foo3 (void) /* { dg-warning "stack usage (might be)|(is) \[0-9\]* bytes" } */
 {
   char arr[1024] __attribute__((aligned (512)));
   arr[0] = 1;
-- 
2.3.0

Reply via email to