This version looks much clean.
LGTM.

-----Original Message-----
From: Beignet [mailto:[email protected]] On Behalf Of 
Zhigang Gong
Sent: Tuesday, July 01, 2014 3:09 PM
To: [email protected]
Cc: Gong, Zhigang
Subject: [Beignet] [PATCH v3] runtime: fix potential curbe allocation issue.

According to spec, different platforms have different curbe allocation 
restrication. The previous code set the curbe allocated size to 480 statically 
which is not correct.

This patch change to always set the curbe entry num to 64 which is the maximum 
work group size. And set proper curbe allocation size according to the 
platform's hard limitation and a relatively reasonable kernel argument usage 
limitation.

v3:
when we call load_vte_state, we already know the eaxctly constant urb size used 
in the current kernel. We could choose a smallest valid curbe size for this 
kernel. And if the size exceed the hardware limitation, we report it as a 
warning here.

Signed-off-by: Zhigang Gong <[email protected]>
---
 src/cl_gt_device.h      |  2 +-
 src/intel/intel_gpgpu.c | 44 ++++++++++++++++++++++++++++++--------------
 2 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index 63c9047..97ba7e2 
100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -39,7 +39,7 @@
 .address_bits = 32,
 .max_mem_alloc_size = 256 * 1024 * 1024,  .image_support = CL_TRUE, 
-.max_read_image_args = 128,
+.max_read_image_args = 16,
 .max_write_image_args = 8,
 .image_max_array_size = 2048,
 .image2d_max_width = 8192,
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 
d403aa0..5957306 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -116,7 +116,7 @@ struct intel_gpgpu
   struct {
     uint32_t num_cs_entries;
     uint32_t size_cs_entry;  /* size of one entry in 512bit elements */
-  } urb;
+  } curb;
 
   uint32_t max_threads;      /* max threads requested by the user */
 };
@@ -275,6 +275,29 @@ uint32_t intel_gpgpu_get_scratch_index_gen75(uint32_t 
size) {
     return index;
 }
 
+static cl_int
+intel_gpgpu_get_max_curbe_size(uint32_t device_id) {
+  if (IS_BAYTRAIL_T(device_id) ||
+      IS_IVB_GT1(device_id))
+    return 992;
+  else
+    return 2016;
+}
+
+static cl_int
+intel_gpgpu_get_curbe_size(intel_gpgpu_t *gpgpu) {
+  int curbe_size = gpgpu->curb.size_cs_entry * 
+gpgpu->curb.num_cs_entries;
+  int max_curbe_size = 
+intel_gpgpu_get_max_curbe_size(gpgpu->drv->device_id);
+
+  if (curbe_size > max_curbe_size) {
+    fprintf(stderr, "warning, curbe size exceed limitation.\n");
+    return max_curbe_size;
+  } else
+    return curbe_size;
+}
+
 static void
 intel_gpgpu_load_vfe_state(intel_gpgpu_t *gpgpu)  { @@ -293,10 +316,10 @@ 
intel_gpgpu_load_vfe_state(intel_gpgpu_t *gpgpu)
     OUT_BATCH(gpgpu->batch, 0);
   }
   /* max_thread | urb entries | (reset_gateway|bypass_gate_way | gpgpu_mode) */
-  OUT_BATCH(gpgpu->batch, 0 | ((gpgpu->max_threads - 1) << 16) | (64 << 8) | 
0xc4);
+  OUT_BATCH(gpgpu->batch, 0 | ((gpgpu->max_threads - 1) << 16) | (0 << 
+ 8) | 0xc4);
   OUT_BATCH(gpgpu->batch, 0);
   /* curbe_size */
-  OUT_BATCH(gpgpu->batch, 480);
+  OUT_BATCH(gpgpu->batch, intel_gpgpu_get_curbe_size(gpgpu));
   OUT_BATCH(gpgpu->batch, 0);
   OUT_BATCH(gpgpu->batch, 0);
   OUT_BATCH(gpgpu->batch, 0);
@@ -309,14 +332,7 @@ intel_gpgpu_load_curbe_buffer(intel_gpgpu_t *gpgpu)
   BEGIN_BATCH(gpgpu->batch, 4);
   OUT_BATCH(gpgpu->batch, CMD(2,0,1) | (4 - 2));  /* length-2 */
   OUT_BATCH(gpgpu->batch, 0);                     /* mbz */
-// XXX
-#if 1
-  OUT_BATCH(gpgpu->batch,
-            gpgpu->urb.size_cs_entry*
-            gpgpu->urb.num_cs_entries*32);
-#else
-  OUT_BATCH(gpgpu->batch, 5120);
-#endif
+  OUT_BATCH(gpgpu->batch, intel_gpgpu_get_curbe_size(gpgpu) * 32);
   OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 
gpgpu->aux_offset.curbe_offset);
   ADVANCE_BATCH(gpgpu->batch);
 }
@@ -577,8 +593,8 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
   gpgpu->sampler_bitmap = ~((1 << max_sampler_n) - 1);
 
   /* URB */
-  gpgpu->urb.num_cs_entries = max_threads;
-  gpgpu->urb.size_cs_entry = size_cs_entry;
+  gpgpu->curb.num_cs_entries = 64;
+  gpgpu->curb.size_cs_entry = size_cs_entry;
   gpgpu->max_threads = max_threads;
 
   if (gpgpu->printf_b.ibo)
@@ -616,7 +632,7 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
   //curbe must be 32 bytes aligned
   size_aux = ALIGN(size_aux, 32);
   gpgpu->aux_offset.curbe_offset = size_aux;
-  size_aux += gpgpu->urb.num_cs_entries * gpgpu->urb.size_cs_entry * 64;
+  size_aux += gpgpu->curb.num_cs_entries * gpgpu->curb.size_cs_entry * 
+ 32;
 
   //idrt must be 32 bytes aligned
   size_aux = ALIGN(size_aux, 32);
--
1.8.3.2

_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet
_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to