ashi1 created this revision.
ashi1 added reviewers: bader, Anastasia, yaxunl.
ashi1 added a subscriber: cfe-commits.
ashi1 set the repository for this revision to rL LLVM.

ndrange_t needs to be emitted as a struct type since it has to be allocated on 
a stack as a local variable or function return.

Repository:
  rL LLVM

https://reviews.llvm.org/D23086

Files:
  lib/CodeGen/CGOpenCLRuntime.cpp
  test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Index: test/CodeGenOpenCL/cl20-device-side-enqueue.cl
===================================================================
--- test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -9,7 +9,7 @@
   queue_t default_queue;
   // CHECK: %flags = alloca i32
   unsigned flags = 0;
-  // CHECK: %ndrange = alloca %opencl.ndrange_t*
+  // CHECK: %ndrange = alloca %ndrange_t
   ndrange_t ndrange;
   // CHECK: %clk_event = alloca %opencl.clk_event_t*
   clk_event_t clk_event;
@@ -20,10 +20,10 @@
 
   // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
   // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
+  // CHECK: [[NDR:%[0-9]+]] = load %ndrange_t, %ndrange_t* %ndrange
   // CHECK: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block to void ()*
   // CHECK: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8*
-  // CHECK: call i32 @__enqueue_kernel_basic(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* [[BL_I8]])
+  // CHECK: call i32 @__enqueue_kernel_basic(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %ndrange_t [[NDR]], i8* [[BL_I8]])
   enqueue_kernel(default_queue, flags, ndrange,
                  ^(void) {
                    a[i] = b[i];
@@ -31,10 +31,10 @@
 
   // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
   // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
+  // CHECK: [[NDR:%[0-9]+]] = load %ndrange_t, %ndrange_t* %ndrange
   // CHECK: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
   // CHECK: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8*
-  // CHECK: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i32 2, %opencl.clk_event_t** %event_wait_list, %opencl.clk_event_t** %clk_event, i8* [[BL_I8]])
+  // CHECK: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %ndrange_t [[NDR]], i32 2, %opencl.clk_event_t** %event_wait_list, %opencl.clk_event_t** %clk_event, i8* [[BL_I8]])
   enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event,
                  ^(void) {
                    a[i] = b[i];
@@ -42,8 +42,8 @@
 
   // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
   // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
-  // CHECK: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256)
+  // CHECK: [[NDR:%[0-9]+]] = load %ndrange_t, %ndrange_t* %ndrange
+  // CHECK: call i32 (%opencl.queue_t*, i32, %ndrange_t, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %ndrange_t [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256)
   enqueue_kernel(default_queue, flags, ndrange,
                  ^(local void *p) {
                    return;
@@ -52,9 +52,9 @@
   char c;
   // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
   // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
+  // CHECK: [[NDR:%[0-9]+]] = load %ndrange_t, %ndrange_t* %ndrange
   // CHECK: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i32
-  // CHECK: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]])
+  // CHECK: call i32 (%opencl.queue_t*, i32, %ndrange_t, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %ndrange_t [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]])
   enqueue_kernel(default_queue, flags, ndrange,
                  ^(local void *p) {
                    return;
@@ -63,9 +63,9 @@
 
   // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
   // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
+  // CHECK: [[NDR:%[0-9]+]] = load %ndrange_t, %ndrange_t* %ndrange
   // CHECK: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
-  // CHECK: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i32 2, %opencl.clk_event_t** [[AD]], %opencl.clk_event_t** %clk_event, i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256)
+  // CHECK: call i32 (%opencl.queue_t*, i32, %ndrange_t, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %ndrange_t [[NDR]], i32 2, %opencl.clk_event_t** [[AD]], %opencl.clk_event_t** %clk_event, i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256)
   enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
                  ^(local void *p) {
                    return;
@@ -74,10 +74,10 @@
 
   // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
   // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
+  // CHECK: [[NDR:%[0-9]+]] = load %ndrange_t, %ndrange_t* %ndrange
   // CHECK: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
   // CHECK: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i32
-  // CHECK: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i32 2, %opencl.clk_event_t** [[AD]], %opencl.clk_event_t** %clk_event, i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]])
+  // CHECK: call i32 (%opencl.queue_t*, i32, %ndrange_t, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %ndrange_t [[NDR]], i32 2, %opencl.clk_event_t** [[AD]], %opencl.clk_event_t** %clk_event, i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]])
   enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
                  ^(local void *p) {
                    return;
Index: lib/CodeGen/CGOpenCLRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenCLRuntime.cpp
+++ lib/CodeGen/CGOpenCLRuntime.cpp
@@ -25,6 +25,24 @@
 
 CGOpenCLRuntime::~CGOpenCLRuntime() {}
 
+//// \brief Defining ndrange_t as mentioned in the SPIR 2.0 spec.
+static llvm::StructType*
+         getStructTyForNDRange(CodeGenModule &CGM) {
+
+  llvm::SmallVector<llvm::Type*,4> EleTypes;
+  llvm::Type* ArrEleType =
+      llvm::ArrayType::get(llvm::IntegerType::
+                         get(CGM.getLLVMContext(),
+                             CGM.PointerWidthInBits), 3);
+
+  EleTypes.push_back(CGM.Int32Ty);    // work_dim
+  EleTypes.push_back(ArrEleType); // global_work_offset
+  EleTypes.push_back(ArrEleType); // global_work_size
+  EleTypes.push_back(ArrEleType); // local_work_size
+
+  return llvm::StructType::create(EleTypes, "ndrange_t");
+}
+
 void CGOpenCLRuntime::EmitWorkGroupLocalVarDecl(CodeGenFunction &CGF,
                                                 const VarDecl &D) {
   return CGF.EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
@@ -59,8 +77,7 @@
     return llvm::PointerType::get(
         llvm::StructType::create(Ctx, "opencl.queue_t"), 0);
   case BuiltinType::OCLNDRange:
-    return llvm::PointerType::get(
-        llvm::StructType::create(Ctx, "opencl.ndrange_t"), 0);
+    return getStructTyForNDRange(CGM);
   case BuiltinType::OCLReserveID:
     return llvm::PointerType::get(
         llvm::StructType::create(Ctx, "opencl.reserve_id_t"), 0);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to