Anastasia created this revision.
Anastasia added a reviewer: yaxunl.
Anastasia added a subscriber: cfe-commits.

Parameters representing number of events as well as sizes of enqueued block 
arguments in enqueue_kernel function are specified as uint type. This prevents 
passing sizeof(int) type expressions on the  64 bit architectures because they 
return size_t which is a 64 bit wide in contrast to uint that is always 32 bits.

The suggestion is to accept any integer type as soon as the compiler can prove 
that the range fits in the data width of uint for number of event or size_t for 
block argument sizes (and if not appropriate error is emitted).

The proposed enqueue_kernel signature would be:

  int enqueue_kernel (queue_t queue, kernel_enqueue_flags_t flags, const 
ndrange_t ndrange, uint num_events_in_wait_list, const clk_event_t 
*event_wait_list, clk_event_t *event_ret, void (^block)(local void *, ...), 
size_t size0, ...)

and therefore compiler should allow the call:

  enqueue_kernel(default_queue, flags, ndrange, 
sizeof(event_wait_list)/sizeof(event_wait_list[0]), event_wait_list2, 
&clk_event,
                 ^(local void *p) {
                   return;
                 },
                 sizeof(int[2]));

See tests for more examples.


https://reviews.llvm.org/D26509

Files:
  include/clang/Basic/DiagnosticSemaKinds.td
  lib/CodeGen/CGBuiltin.cpp
  lib/Sema/SemaChecking.cpp
  test/CodeGenOpenCL/cl20-device-side-enqueue.cl
  test/SemaOpenCL/cl20-device-side-enqueue.cl

Index: test/SemaOpenCL/cl20-device-side-enqueue.cl
===================================================================
--- test/SemaOpenCL/cl20-device-side-enqueue.cl
+++ test/SemaOpenCL/cl20-device-side-enqueue.cl
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -pedantic -fsyntax-only
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -pedantic -fsyntax-only -Wconversion -DWCONV
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir-unknown-unknown" -verify -pedantic -fsyntax-only -DB32
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir64-unknown-unknown" -verify -pedantic -fsyntax-only -Wconversion -DWCONV
 
 // Diagnostic tests for different overloads of enqueue_kernel from Table 6.13.17.1 of OpenCL 2.0 Spec.
 kernel void enqueue_kernel_tests() {
@@ -39,6 +40,10 @@
     return 0;
   });
 
+  enqueue_kernel(default_queue, flags, ndrange, vptr, &event_wait_list, &evt, ^(void) { // expected-error{{illegal call to enqueue_kernel, parameter needs to be specified as integer type}}
+    return 0;
+  });
+
   enqueue_kernel(default_queue, flags, ndrange, 1, vptr, &evt, ^(void) // expected-error{{illegal call to enqueue_kernel, expected 'clk_event_t *' argument type}}
                                                                {
                                                                  return 0;
@@ -62,16 +67,35 @@
                  ^(local void *a, local void *b) {
                    return 0;
                  },
-                 1024, 1024L); // expected-error{{local memory sizes need to be specified as uint}}
+                 1024L, 1024);
+
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(local void *a, local void *b) {
+                   return 0;
+                 },
+                 1024, 4294967296L);
+#ifdef B32
+// expected-warning@-2{{implicit conversion from 'long' to 'unsigned int' changes value from 4294967296 to 0}}
+#endif
 
   char c;
   enqueue_kernel(default_queue, flags, ndrange,
                  ^(local void *a, local void *b) {
                    return 0;
                  },
-                 c, 1024);
+                 c, 1024L);
 #ifdef WCONV
-// expected-warning@-2{{implicit conversion changes signedness: 'char' to 'unsigned int'}}
+// expected-warning-re@-2{{implicit conversion changes signedness: 'char' to 'unsigned {{int|long}}'}}
+#endif
+#define UINT_MAX 4294967295
+
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(local void *a, local void *b) {
+                   return 0;
+                 },
+                 sizeof(int), sizeof(int) * UINT_MAX);
+#ifdef B32
+// expected-warning@-2{{implicit conversion from 'long' to 'unsigned int' changes value from 17179869180 to 4294967292}}
 #endif
 
   typedef void (^bl_A_t)(local void *);
@@ -97,10 +121,13 @@
                  ^(local void *a, local void *b) {
                    return 0;
                  },
-                 illegal_mem_size, illegal_mem_size); // expected-error{{local memory sizes need to be specified as uint}} expected-error{{local memory sizes need to be specified as uint}}
-#ifdef WCONV
-// expected-warning@-2{{implicit conversion turns floating-point number into integer: 'float' to 'unsigned int'}} expected-warning@-2{{implicit conversion turns floating-point number into integer: 'float' to 'unsigned int'}}
-#endif
+                 illegal_mem_size, illegal_mem_size); // expected-error{{illegal call to enqueue_kernel, parameter needs to be specified as integer type}} expected-error{{illegal call to enqueue_kernel, parameter needs to be specified as integer type}}
+
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(local void *a, local void *b) {
+                   return 0;
+                 },
+                 illegal_mem_size, 1024); // expected-error{{illegal call to enqueue_kernel, parameter needs to be specified as integer type}}
 
   // Testing the forth overload type
   enqueue_kernel(default_queue, flags, ndrange, 1, event_wait_list2, &evt,
Index: test/CodeGenOpenCL/cl20-device-side-enqueue.cl
===================================================================
--- test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -1,89 +1,120 @@
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCOMMON %s --COMMON-prefix=COMMON --COMMON-prefix=B32
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o -  -triple "spir64-unknown-unknown" | FileCOMMON %s --COMMON-prefix=COMMON --COMMON-prefix=B64
 
 typedef void (^bl_t)(local void *);
-
+// COMMON: define internal spir_func void @block_G_block_invoke(i8 addrspace(4)* %.block_descriptor, i8 addrspace(2)* %a)
 const bl_t block_G = (bl_t) ^ (local void *a) {};
 
 kernel void device_side_enqueue(global int *a, global int *b, int i) {
-  // CHECK: %default_queue = alloca %opencl.queue_t*
+  // COMMON: %default_queue = alloca %opencl.queue_t*
   queue_t default_queue;
-  // CHECK: %flags = alloca i32
+  // COMMON: %flags = alloca i32
   unsigned flags = 0;
-  // CHECK: %ndrange = alloca %opencl.ndrange_t*
+  // COMMON: %ndrange = alloca %opencl.ndrange_t*
   ndrange_t ndrange;
-  // CHECK: %clk_event = alloca %opencl.clk_event_t*
+  // COMMON: %clk_event = alloca %opencl.clk_event_t*
   clk_event_t clk_event;
-  // CHECK: %event_wait_list = alloca %opencl.clk_event_t*
+  // COMMON: %event_wait_list = alloca %opencl.clk_event_t*
   clk_event_t event_wait_list;
-  // CHECK: %event_wait_list2 = alloca [1 x %opencl.clk_event_t*]
+  // COMMON: %event_wait_list2 = alloca [1 x %opencl.clk_event_t*]
   clk_event_t event_wait_list2[] = {clk_event};
 
-  // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
-  // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
-  // CHECK: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor addrspace(3)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block to void ()*
-  // CHECK: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8*
-  // CHECK: call i32 @__enqueue_kernel_basic(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* [[BL_I8]])
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
+  // COMMON: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor addrspace(3)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block to void ()*
+  // COMMON: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8*
+  // COMMON: call i32 @__enqueue_kernel_basic(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* [[BL_I8]])
   enqueue_kernel(default_queue, flags, ndrange,
                  ^(void) {
                    a[i] = b[i];
                  });
 
-  // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
-  // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
-  // CHECK: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor addrspace(3)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
-  // CHECK: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8*
-  // CHECK: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i32 2, %opencl.clk_event_t** %event_wait_list, %opencl.clk_event_t** %clk_event, i8* [[BL_I8]])
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
+  // COMMON: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor addrspace(3)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
+  // COMMON: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8*
+  // COMMON: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i32 2, %opencl.clk_event_t** %event_wait_list, %opencl.clk_event_t** %clk_event, i8* [[BL_I8]])
   enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event,
                  ^(void) {
                    a[i] = b[i];
                  });
 
-  // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
-  // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
-  // CHECK: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256)
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
+  // B32: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256)
+  // B64: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 256)
   enqueue_kernel(default_queue, flags, ndrange,
                  ^(local void *p) {
                    return;
                  },
                  256);
   char c;
-  // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
-  // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
-  // CHECK: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i32
-  // CHECK: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]])
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
+  // B32: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i32
+  // B32: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]])
+  // B64: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i64
+  // B64: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 [[SIZE]])
   enqueue_kernel(default_queue, flags, ndrange,
                  ^(local void *p) {
                    return;
                  },
                  c);
 
-  // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
-  // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
-  // CHECK: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
-  // CHECK: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i32 2, %opencl.clk_event_t** [[AD]], %opencl.clk_event_t** %clk_event, i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256)
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
+  // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
+  // B32: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i32 2, %opencl.clk_event_t** [[AD]], %opencl.clk_event_t** %clk_event, i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256)
+  // B64: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i32 2, %opencl.clk_event_t** [[AD]], %opencl.clk_event_t** %clk_event, i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 256)
   enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
                  ^(local void *p) {
                    return;
                  },
                  256);
 
-  // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
-  // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
-  // CHECK: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
-  // CHECK: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i32
-  // CHECK: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i32 2, %opencl.clk_event_t** [[AD]], %opencl.clk_event_t** %clk_event, i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]])
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange
+  // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
+  // COMMON: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i32
+  // B32: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i32
+  // B32: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i32 2, %opencl.clk_event_t** [[AD]], %opencl.clk_event_t** %clk_event, i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]])
+  // B64: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i64
+  // B32: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i32 2, %opencl.clk_event_t** [[AD]], %opencl.clk_event_t** %clk_event, i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 [[SIZE]])
   enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
                  ^(local void *p) {
                    return;
                  },
                  c);
 
+  long l;
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // B32: [[SIZE:%[0-9]+]] = trunc i64 {{%[0-9]+}} to i32
+  // B32: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]])
+  // B64: [[SIZE:%[0-9]+]] = load i64, i64* %l
+  // B64: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 [[SIZE]])
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(local void *p) {
+                   return;
+                 },
+                 l);
+
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // B32: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 0)
+  // B64: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(3)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 4294967296)
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(local void *p) {
+                   return;
+                 },
+                 4294967296L);
+
   void (^const block_A)(void) = ^{
     return;
   };
@@ -91,20 +122,20 @@
     return;
   };
 
-  // CHECK: [[BL:%[0-9]+]] = load void ()*, void ()** %block_A
-  // CHECK: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8*
-  // CHECK: call i32 @__get_kernel_work_group_size_impl(i8* [[BL_I8]])
+  // COMMON: [[BL:%[0-9]+]] = load void ()*, void ()** %block_A
+  // COMMON: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8*
+  // COMMON: call i32 @__get_kernel_work_group_size_impl(i8* [[BL_I8]])
   unsigned size = get_kernel_work_group_size(block_A);
-  // CHECK: [[BL:%[0-9]+]] = load void (i8 addrspace(2)*)*, void (i8 addrspace(2)*)** %block_B
-  // CHECK: [[BL_I8:%[0-9]+]] = bitcast void (i8 addrspace(2)*)* [[BL]] to i8*
-  // CHECK: call i32 @__get_kernel_work_group_size_impl(i8* [[BL_I8]])
+  // COMMON: [[BL:%[0-9]+]] = load void (i8 addrspace(2)*)*, void (i8 addrspace(2)*)** %block_B
+  // COMMON: [[BL_I8:%[0-9]+]] = bitcast void (i8 addrspace(2)*)* [[BL]] to i8*
+  // COMMON: call i32 @__get_kernel_work_group_size_impl(i8* [[BL_I8]])
   size = get_kernel_work_group_size(block_B);
-  // CHECK: [[BL:%[0-9]+]] = load void ()*, void ()** %block_A
-  // CHECK: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8*
-  // CHECK: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8* [[BL_I8]])
+  // COMMON: [[BL:%[0-9]+]] = load void ()*, void ()** %block_A
+  // COMMON: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8*
+  // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8* [[BL_I8]])
   size = get_kernel_preferred_work_group_size_multiple(block_A);
-  // CHECK: [[BL:%[0-9]+]] = load void (i8 addrspace(2)*)*, void (i8 addrspace(2)*)* addrspace(1)* @block_G
-  // CHECK: [[BL_I8:%[0-9]+]] = bitcast void (i8 addrspace(2)*)* [[BL]] to i8*
-  // CHECK: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8* [[BL_I8]])
+  // COMMON: [[BL:%[0-9]+]] = load void (i8 addrspace(2)*)*, void (i8 addrspace(2)*)* addrspace(1)* @block_G
+  // COMMON: [[BL_I8:%[0-9]+]] = bitcast void (i8 addrspace(2)*)* [[BL]] to i8*
+  // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8* [[BL_I8]])
   size = get_kernel_preferred_work_group_size_multiple(block_G);
 }
Index: lib/Sema/SemaChecking.cpp
===================================================================
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -315,8 +315,18 @@
   return checkOpenCLBlockArgs(S, BlockArg);
 }
 
+/// Diagnose integer type and any valid implicit convertion to it.
+static bool checkOpenCLEnqueueIntType(Sema &S, Expr *E,
+                                      const QualType &IntType);
+
 static bool checkOpenCLEnqueueLocalSizeArgs(Sema &S, CallExpr *TheCall,
-                                            unsigned Start, unsigned End);
+                                            unsigned Start, unsigned End) {
+  bool IllegalParams = false;
+  for (unsigned I = Start; I <= End; ++I)
+    IllegalParams |= checkOpenCLEnqueueIntType(S, TheCall->getArg(I),
+                                              S.Context.getSizeType());
+  return IllegalParams;
+}
 
 /// OpenCL v2.0, s6.13.17.1 - Check that sizes are provided for all
 /// 'local void*' parameter of passed block.
@@ -440,18 +450,14 @@
     if (checkOpenCLBlockArgs(S, Arg6))
       return true;
 
-    // Forth argument has to be any integer type.
-    if (!Arg3->getType()->isIntegerType()) {
-      S.Diag(TheCall->getArg(3)->getLocStart(),
-             diag::err_opencl_enqueue_kernel_expected_type)
-          << "integer";
-      return true;
-    }
+    // Fourth argument has to be any integer type that fits into 32 bits.
+    checkOpenCLEnqueueIntType(S, Arg3, S.Context.UnsignedIntTy);
+
     // check remaining common arguments.
     Expr *Arg4 = TheCall->getArg(4);
     Expr *Arg5 = TheCall->getArg(5);
 
-    // Fith argument is always passed as pointers to clk_event_t.
+    // Fifth argument is always passed as pointers to clk_event_t.
     if (!Arg4->getType()->getPointeeOrArrayElementType()->isClkEventT()) {
       S.Diag(TheCall->getArg(4)->getLocStart(),
              diag::err_opencl_enqueue_kernel_expected_type)
@@ -9332,25 +9338,19 @@
 
 } // end anonymous namespace
 
-static bool checkOpenCLEnqueueLocalSizeArgs(Sema &S, CallExpr *TheCall,
-                                            unsigned Start, unsigned End) {
-  bool IllegalParams = false;
-  for (unsigned I = Start; I <= End; ++I) {
-    QualType Ty = TheCall->getArg(I)->getType();
-    // Taking into account implicit conversions,
-    // allow any integer within 32 bits range
-    if (!Ty->isIntegerType() ||
-        S.Context.getTypeSizeInChars(Ty).getQuantity() > 4) {
-      S.Diag(TheCall->getArg(I)->getLocStart(),
-             diag::err_opencl_enqueue_kernel_invalid_local_size_type);
-      IllegalParams = true;
-    }
-    // Potentially emit standard warnings for implicit conversions if enabled
-    // using -Wconversion.
-    CheckImplicitConversion(S, TheCall->getArg(I), S.Context.UnsignedIntTy,
-                            TheCall->getArg(I)->getLocStart());
+/// Diagnose integer type and any valid implicit convertion to it.
+static bool checkOpenCLEnqueueIntType(Sema &S, Expr *E, const QualType &IntT) {
+  // Taking into account implicit conversions,
+  // allow any integer.
+  if (!E->getType()->isIntegerType()) {
+    S.Diag(E->getLocStart(),
+           diag::err_opencl_enqueue_kernel_invalid_local_size_type);
+    return true;
   }
-  return IllegalParams;
+  // Potentially emit standard warnings for implicit conversions if enabled
+  // using -Wconversion.
+  CheckImplicitConversion(S, E, IntT, E->getLocStart());
+  return false;
 }
 
 // Helper function for Sema::DiagnoseAlwaysNonNullPointer.
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -2517,17 +2517,11 @@
       std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy,
                                           IntTy};
 
-      // Add the variadics.
-      for (unsigned I = 4; I < NumArgs; ++I) {
-        llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
-        unsigned TypeSizeInBytes =
-            getContext()
-                .getTypeSizeInChars(E->getArg(I)->getType())
-                .getQuantity();
-        Args.push_back(TypeSizeInBytes < 4
-                           ? Builder.CreateZExt(ArgSize, Int32Ty)
-                           : ArgSize);
-      }
+      // Each of the following arguments specifies the size of the corresponding
+      // argument passed to the enqueued block.
+      for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I)
+        Args.push_back(
+            Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
 
       llvm::FunctionType *FTy = llvm::FunctionType::get(
           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
@@ -2549,7 +2543,8 @@
       llvm::Type *EventPtrAS5Ty =
           EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5));
 
-      llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3));
+      llvm::Value *NumEvents =
+          Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
       llvm::Value *EventList =
           E->getArg(4)->getType()->isArrayType()
               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
@@ -2579,17 +2574,12 @@
       ArgTys.push_back(Int32Ty);
       Name = "__enqueue_kernel_events_vaargs";
 
-      // Add the variadics.
-      for (unsigned I = 7; I < NumArgs; ++I) {
-        llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
-        unsigned TypeSizeInBytes =
-            getContext()
-                .getTypeSizeInChars(E->getArg(I)->getType())
-                .getQuantity();
-        Args.push_back(TypeSizeInBytes < 4
-                           ? Builder.CreateZExt(ArgSize, Int32Ty)
-                           : ArgSize);
-      }
+      // Each of the following arguments specifies the size of the corresponding
+      // argument passed to the enqueued block.
+      for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I)
+        Args.push_back(
+            Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
+
       llvm::FunctionType *FTy = llvm::FunctionType::get(
           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
       return RValue::get(
Index: include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- include/clang/Basic/DiagnosticSemaKinds.td
+++ include/clang/Basic/DiagnosticSemaKinds.td
@@ -8158,7 +8158,7 @@
 def err_opencl_enqueue_kernel_local_size_args : Error<
   "mismatch in number of block parameters and local size arguments passed">;
 def err_opencl_enqueue_kernel_invalid_local_size_type : Error<
-  "local memory sizes need to be specified as uint">;
+  "illegal call to enqueue_kernel, parameter needs to be specified as integer type">;
 def err_opencl_enqueue_kernel_blocks_non_local_void_args : Error<
   "blocks used in device side enqueue are expected to have parameters of type 'local void*'">;
 def err_opencl_enqueue_kernel_blocks_no_args : Error<
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to