[Beignet] [PATCH] runtime: relax max group restrications.

Zhigang Gong Sun, 29 Jun 2014 19:05:26 -0700

From: Zhigang Gong <[email protected]>

If the kernel doesn't use slm/barrier, there is no hard limitation
for the max group size. And if the max work group size is more than
1024, the original 64 urb entry count will not be sufficient to hold
all the curbe payload. Change the entry count to max thread count to
fix this potential issue.


I found this bug when I tried to run phoronix test suite's juliagpu
test case on my MBA.

Signed-off-by: Zhigang Gong <[email protected]>
---
 src/cl_command_queue_gen7.c | 6 ------
 src/cl_kernel.c             | 3 ++-
 src/intel/intel_gpgpu.c     | 2 +-
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 9af4829..5b80d74 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -304,12 +304,6 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
   kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz;
   kernel.curbe_sz = cst_sz;
 
-  /* Barrier and SLM must fit into a single half slice */
-  if(kernel.use_slm > 0 && simd_sz == 8 && local_sz > 
MAX_GROUP_SIZE_IN_HALFSLICE){
-    fprintf(stderr, "Beignet: Work group CAN NOT large than %d when using 
barrier or local momery.\n", MAX_GROUP_SIZE_IN_HALFSLICE);
-    return CL_OUT_OF_RESOURCES;
-  }
-
   if (scratch_sz > ker->program->ctx->device->scratch_mem_size) {
     fprintf(stderr, "Beignet: Out of scratch memory %d.\n", scratch_sz);
     return CL_OUT_OF_RESOURCES;
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index 5d0b36e..76df00a 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -410,7 +410,8 @@ cl_kernel_work_group_sz(cl_kernel ker,
   for (i = 1; i < wk_dim; ++i)
     sz *= local_wk_sz[i];
 
-  if (sz > ker->program->ctx->device->max_work_group_size) {
+  if (interp_kernel_use_slm(ker->opaque) > 0 &&
+      sz > ker->program->ctx->device->max_work_group_size * 
(cl_kernel_get_simd_width(ker)/8)) {
     err = CL_INVALID_WORK_ITEM_SIZE;
     goto error;
   }
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 3b89539..5decdfc 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -577,7 +577,7 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
   gpgpu->sampler_bitmap = ~((1 << max_sampler_n) - 1);
 
   /* URB */
-  gpgpu->urb.num_cs_entries = 64;
+  gpgpu->urb.num_cs_entries = max_threads;
   gpgpu->urb.size_cs_entry = size_cs_entry;
   gpgpu->max_threads = max_threads;
 
-- 
1.8.3.2

_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet

[Beignet] [PATCH] runtime: relax max group restrications.

Reply via email to