We didn't preserve additional space for the alloca frame pointers that
are needed to be saved in the alloca space.

Fixes libgomp.c++/target-6.C execution test.
---
 libhsail-rt/rt/workitems.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/libhsail-rt/rt/workitems.c b/libhsail-rt/rt/workitems.c
index 39daf27..36c9169 100644
--- a/libhsail-rt/rt/workitems.c
+++ b/libhsail-rt/rt/workitems.c
@@ -63,6 +63,12 @@ static clock_t start_time;
 #define FIBER_STACK_SIZE (64*1024)
 #define GROUP_SEGMENT_ALIGN 256
+/* Preserve this amount of additional space in the alloca stack as we need to
+   store the alloca frame pointer to the alloca frame, thus must preserve
+   space for it.  This thus supports at most 1024 functions with allocas in
+   a call chain.  */
+#define ALLOCA_OVERHEAD 1024*4
+
 uint32_t __hsail_workitemabsid (uint32_t dim, PHSAWorkItem *context);
  uint32_t __hsail_workitemid (uint32_t dim, PHSAWorkItem *context);
@@ -246,7 +252,7 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr,
           != 0)
     phsa_fatal_error (3);
 -  wg.alloca_stack_p = wg.private_segment_total_size;
+  wg.alloca_stack_p = wg.private_segment_total_size + ALLOCA_OVERHEAD;
   wg.alloca_frame_p = wg.alloca_stack_p;
   wg.initial_group_offset = group_local_offset;
@@ -446,7 +452,7 @@ phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr,
           != 0)
     phsa_fatal_error (3);
 -  wg.alloca_stack_p = dp->private_segment_size * wg_size;
+  wg.alloca_stack_p = dp->private_segment_size * wg_size + ALLOCA_OVERHEAD;
   wg.alloca_frame_p = wg.alloca_stack_p;
    wg.private_base_ptr = private_base_ptr;
@@ -867,9 +873,12 @@ uint32_t
 __hsail_alloca (uint32_t size, uint32_t align, PHSAWorkItem *wi)
 {
   volatile PHSAWorkGroup *wg = wi->wg;
-  uint32_t new_pos = wg->alloca_stack_p - size;
+  int64_t new_pos = wg->alloca_stack_p - size;
   while (new_pos % align != 0)
     new_pos--;
+  if (new_pos < 0)
+    phsa_fatal_error (2);
+
   wg->alloca_stack_p = new_pos;
  #ifdef DEBUG_ALLOCA
--
2.7.4



Reply via email to