mibintc updated this revision to Diff 310338.
mibintc added a comment.

Thanks @Anastasia ; I modified CGBuiltin.cpp to use EmitRuntimeCall when 
creating calls to runtime functions instead of Builder.CreateCall so this is 
setting the CallingConvention on the Call node using the ABIInfo.  I changed a 
few lit tests that were failing because of this.

I have a question, the clang test CodeGenOpenCL/cl20-device-side-enqueue.cl has 
this function, device_side_enqueue_block_invoke_9 which is defined with 
spir_func CC. The call doesn't have that CC. Where is that call created?  Seems 
like the call should have spir_func CC, I want to fix that.

Likewise device_side_enqueue_block_invoke_9_kernel has spir_kernel CC.  How 
does the CC get set to spir_kernel, I don't see where that is happening

define internal spir_func void @__device_side_enqueue_block_invoke_9(i8 
addrspace(4)* %.block_descriptor, i8 addrspace(3)* %p1, i8 addrspace(3)* %p2, 
i8 addrspace(3)* %p3) #1 {
define internal spir_kernel void 
@__device_side_enqueue_block_invoke_9_kernel(i8 addrspace(4)* %0, i8 
addrspace(3)* %1, i8 addrspace(3)* %2, i8 addrspace(3)* %3) #5 {

  call void @__device_side_enqueue_block_invoke_9(i8 addrspace(4)* %0, i8 
addrspace(3)* %1, i8 addrspace(3)* %2, i8 addrspace(3)* %3)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92721/new/

https://reviews.llvm.org/D92721

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/complex-math.c
  clang/test/CodeGenOpenCL/builtins.cl
  clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
  clang/test/CodeGenOpenCL/to_addr_builtin.cl
  clang/test/CodeGenOpenCLCXX/atexit.cl

Index: clang/test/CodeGenOpenCLCXX/atexit.cl
===================================================================
--- clang/test/CodeGenOpenCLCXX/atexit.cl
+++ clang/test/CodeGenOpenCLCXX/atexit.cl
@@ -5,7 +5,7 @@
 };
 S s;
 
-//CHECK-LABEL: define internal void @__cxx_global_var_init()
-//CHECK: call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S addrspace(4)*)* @_ZNU3AS41SD1Ev to void (i8*)*), i8* null, i8 addrspace(1)* @__dso_handle)
+//CHECK-LABEL: define internal spir_func void @__cxx_global_var_init()
+//CHECK: call spir_func i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S addrspace(4)*)* @_ZNU3AS41SD1Ev to void (i8*)*), i8* null, i8 addrspace(1)* @__dso_handle)
 
-//CHECK: declare i32 @__cxa_atexit(void (i8*)*, i8*, i8 addrspace(1)*)
+//CHECK: declare spir_func i32 @__cxa_atexit(void (i8*)*, i8*, i8 addrspace(1)*)
Index: clang/test/CodeGenOpenCL/to_addr_builtin.cl
===================================================================
--- clang/test/CodeGenOpenCL/to_addr_builtin.cl
+++ clang/test/CodeGenOpenCL/to_addr_builtin.cl
@@ -14,74 +14,74 @@
   generic int *gen;
 
   //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(1)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(1)* %[[RET]] to i32 addrspace(1)*
   glob = to_global(glob);
   
   //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(3)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(1)* %[[RET]] to i32 addrspace(1)*
   glob = to_global(loc);
  
   //CHECK: %[[ARG:.*]] = addrspacecast i32* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(1)* %[[RET]] to i32 addrspace(1)*
   glob = to_global(priv);
  
   //CHECK: %[[ARG:.*]] = bitcast i32 addrspace(4)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(1)* %[[RET]] to i32 addrspace(1)*
   glob = to_global(gen);
   
   //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(1)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(3)* %[[RET]] to i32 addrspace(3)*
   loc = to_local(glob);
 
   //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(3)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(3)* %[[RET]] to i32 addrspace(3)*
   loc = to_local(loc);
 
   //CHECK: %[[ARG:.*]] = addrspacecast i32* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(3)* %[[RET]] to i32 addrspace(3)*
   loc = to_local(priv);
 
   //CHECK: %[[ARG:.*]] = bitcast i32 addrspace(4)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(3)* %[[RET]] to i32 addrspace(3)*
   loc = to_local(gen);
 
   //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(1)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8* @__to_private(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8* @__to_private(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8* %[[RET]] to i32*
   priv = to_private(glob);
 
   //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(3)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8* @__to_private(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8* @__to_private(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8* %[[RET]] to i32*
   priv = to_private(loc);
 
   //CHECK: %[[ARG:.*]] = addrspacecast i32* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8* @__to_private(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8* @__to_private(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8* %[[RET]] to i32*
   priv = to_private(priv);
 
   //CHECK: %[[ARG:.*]] = bitcast i32 addrspace(4)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8* @__to_private(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8* @__to_private(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8* %[[RET]] to i32*
   priv = to_private(gen);
 
   //CHECK: %[[ARG:.*]] = addrspacecast %[[A]]* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(1)* %[[RET]] to %[[A]] addrspace(1)*
   PA pA;
   GA gA = to_global(pA);
 
   //CHECK-NOT: addrspacecast
   //CHECK-NOT: bitcast
-  //CHECK: call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %{{.*}})
+  //CHECK: call spir_func i8 addrspace(1)* @__to_global(i8 addrspace(4)* %{{.*}})
   //CHECK-NOT: addrspacecast
   //CHECK-NOT: bitcast
   generic void *gen_v;
Index: clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
===================================================================
--- clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -80,7 +80,7 @@
   // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic*
   // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic*
   // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
-  // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_basic(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
@@ -97,7 +97,7 @@
   // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
   // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block4 to %struct.__opencl_block_literal_generic*
   // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
-  // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_basic_events
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
@@ -106,7 +106,7 @@
                    a[i] = b[i];
                  });
 
-  // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_basic_events
   // COMMON-SAME: (%opencl.queue_t{{.*}}* {{%[0-9]+}}, i32 {{%[0-9]+}}, %struct.ndrange_t* {{.*}}, i32 1, %opencl.clk_event_t{{.*}}* addrspace(4)* null, %opencl.clk_event_t{{.*}}* addrspace(4)* null,
   enqueue_kernel(default_queue, flags, ndrange, 1, 0, 0,
                  ^(void) {
@@ -119,13 +119,13 @@
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES1]] to i8*
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES1]], i64 0, i64 0
-  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
+  // CHECK-LIFETIMES-LABEL: call spir_func i32 @__enqueue_kernel_varargs(
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES1]], i32 0, i32 0
   // B32: store i32 256, i32* %[[TMP]], align 4
   // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES1]], i32 0, i32 0
   // B64: store i64 256, i64* %[[TMP]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_varargs(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
@@ -144,13 +144,13 @@
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES2]] to i8*
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES2]], i64 0, i64 0
-  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
+  // CHECK-LIFETIMES-LABEL: call spir_func i32 @__enqueue_kernel_varargs(
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES2]], i32 0, i32 0
   // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4
   // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES2]], i32 0, i32 0
   // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_varargs(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
@@ -171,13 +171,13 @@
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES3]] to i8*
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES3]], i64 0, i64 0
-  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_events_varargs(
+  // CHECK-LIFETIMES-LABEL: call spir_func i32 @__enqueue_kernel_events_varargs(
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES3]], i32 0, i32 0
   // B32: store i32 256, i32* %[[TMP]], align 4
   // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES3]], i32 0, i32 0
   // B64: store i64 256, i64* %[[TMP]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_events_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]],
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
@@ -198,13 +198,13 @@
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES4]] to i8*
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES4]], i64 0, i64 0
-  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_events_varargs(
+  // CHECK-LIFETIMES-LABEL: call spir_func i32 @__enqueue_kernel_events_varargs(
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES4]], i32 0, i32 0
   // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4
   // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES4]], i32 0, i32 0
   // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_events_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK4:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
@@ -223,13 +223,13 @@
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES5]] to i8*
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES5]], i64 0, i64 0
-  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
+  // CHECK-LIFETIMES-LABEL: call spir_func i32 @__enqueue_kernel_varargs(
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES5]], i32 0, i32 0
   // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4
   // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES5]], i32 0, i32 0
   // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK5:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
@@ -247,7 +247,7 @@
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [3 x i64]* %[[BLOCK_SIZES6]] to i8*
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull [[LIFETIME_PTR]])
   // CHECK-LIFETIMES-NEXT: getelementptr inbounds [3 x i64], [3 x i64]* %[[BLOCK_SIZES6]], i64 0, i64 0
-  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
+  // CHECK-LIFETIMES-LABEL: call spir_func i32 @__enqueue_kernel_varargs(
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull [[LIFETIME_PTR]])
   // B32: %[[TMP:.*]] = getelementptr [3 x i32], [3 x i32]* %[[BLOCK_SIZES6]], i32 0, i32 0
   // B32: store i32 1, i32* %[[TMP]], align 4
@@ -261,7 +261,7 @@
   // B64: store i64 2, i64* %[[BLOCK_SIZES62]], align 8
   // B64: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i64], [3 x i64]* %[[BLOCK_SIZES6]], i32 0, i32 2
   // B64: store i64 4, i64* %[[BLOCK_SIZES63]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK6:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3,
@@ -279,13 +279,13 @@
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES7]] to i8*
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES7]], i64 0, i64 0
-  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
+  // CHECK-LIFETIMES-LABEL: call spir_func i32 @__enqueue_kernel_varargs(
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES7]], i32 0, i32 0
   // B32: store i32 0, i32* %[[TMP]], align 4
   // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES7]], i32 0, i32 0
   // B64: store i64 4294967296, i64* %[[TMP]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK7:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
@@ -318,14 +318,14 @@
   // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_basic(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   enqueue_kernel(default_queue, flags, ndrange, block_A);
 
   // Uses block kernel [[INVGK8]] and global block literal [[BLG8]].
-  // COMMON: call i32 @__get_kernel_work_group_size_impl(
+  // COMMON: call spir_func i32 @__get_kernel_work_group_size_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   unsigned size = get_kernel_work_group_size(block_A);
@@ -352,7 +352,7 @@
   // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null)
   b2(0);
   // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
-  // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
+  // COMMON: call spir_func i32 @__get_kernel_preferred_work_group_size_multiple_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_preferred_work_group_size_multiple(b2);
@@ -365,38 +365,38 @@
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
   // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)*
-  // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_basic(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
   enqueue_kernel(default_queue, flags, ndrange, block_C);
 
   // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INV9]].
-  // COMMON: call i32 @__get_kernel_work_group_size_impl(
+  // COMMON: call spir_func i32 @__get_kernel_work_group_size_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK9:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_work_group_size(block_B);
 
   // Uses global block literal [[BLG8]] and block kernel [[INVGK8]]. Make sure no redundant block literal ind invoke functions are emitted.
-  // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
+  // COMMON: call spir_func i32 @__get_kernel_preferred_work_group_size_multiple_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_preferred_work_group_size_multiple(block_A);
 
   // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
-  // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
+  // COMMON: call spir_func i32 @__get_kernel_preferred_work_group_size_multiple_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_preferred_work_group_size_multiple(block_G);
 
   // Emits global block literal [[BLG10]] and block kernel [[INVGK10]].
-  // COMMON: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}},
+  // COMMON: call spir_func i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK10:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG10]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){});
 
   // Emits global block literal [[BLG11]] and block kernel [[INVGK11]].
-  // COMMON: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}},
+  // COMMON: call spir_func i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK11:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG11]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_sub_group_count_for_ndrange(ndrange, ^(){});
Index: clang/test/CodeGenOpenCL/builtins.cl
===================================================================
--- clang/test/CodeGenOpenCL/builtins.cl
+++ clang/test/CodeGenOpenCL/builtins.cl
@@ -5,19 +5,19 @@
 
     if (enqueue_kernel(default_queue, flags, ndrange, ^(void) {}))
         (void)0;
-    // CHECK: [[P:%[0-9]+]] = call i32 @__enqueue_kernel
+    // CHECK: [[P:%[0-9]+]] = call spir_func i32 @__enqueue_kernel
     // CHECK-NEXT: [[Q:%[a-z0-9]+]] = icmp ne i32 [[P]], 0
     // CHECK-NEXT: br i1 [[Q]]
 
     if (get_kernel_work_group_size(^(void) {}))
         (void)0;
-    // CHECK: [[P:%[0-9]+]] = call i32 @__get_kernel_work_group_size
+    // CHECK: [[P:%[0-9]+]] = call spir_func i32 @__get_kernel_work_group_size
     // CHECK-NEXT: [[Q:%[a-z0-9]+]] = icmp ne i32 [[P]], 0
     // CHECK-NEXT: br i1 [[Q]]
 
     if (get_kernel_preferred_work_group_size_multiple(^(void) {}))
         (void)0;
-    // CHECK: [[P:%[0-9]+]] = call i32 @__get_kernel_preferred_work_group_size_multiple_impl
+    // CHECK: [[P:%[0-9]+]] = call spir_func i32 @__get_kernel_preferred_work_group_size_multiple_impl
     // CHECK-NEXT: [[Q:%[a-z0-9]+]] = icmp ne i32 [[P]], 0
     // CHECK-NEXT: br i1 [[Q]]
 }
@@ -27,32 +27,32 @@
 
     if (read_pipe(r, ptr))
         (void)0;
-    // CHECK: [[R:%[0-9]+]] = call i32 @__read_pipe_2
+    // CHECK: [[R:%[0-9]+]] = call spir_func i32 @__read_pipe_2
     // CHECK-NEXT: icmp ne i32 [[R]], 0
 
     if (write_pipe(w, ptr))
         (void)0;
-    // CHECK: [[R:%[0-9]+]] = call i32 @__write_pipe_2
+    // CHECK: [[R:%[0-9]+]] = call spir_func i32 @__write_pipe_2
     // CHECK-NEXT: icmp ne i32 [[R]], 0
 
     if (get_pipe_num_packets(r))
         (void)0;
-    // CHECK: [[R:%[0-9]+]] = call i32 @__get_pipe_num_packets_ro
+    // CHECK: [[R:%[0-9]+]] = call spir_func i32 @__get_pipe_num_packets_ro
     // CHECK-NEXT: icmp ne i32 [[R]], 0
 
     if (get_pipe_num_packets(w))
         (void)0;
-    // CHECK: [[R:%[0-9]+]] = call i32 @__get_pipe_num_packets_wo
+    // CHECK: [[R:%[0-9]+]] = call spir_func i32 @__get_pipe_num_packets_wo
     // CHECK-NEXT: icmp ne i32 [[R]], 0
 
     if (get_pipe_max_packets(r))
         (void)0;
-    // CHECK: [[R:%[0-9]+]] = call i32 @__get_pipe_max_packets_ro
+    // CHECK: [[R:%[0-9]+]] = call spir_func i32 @__get_pipe_max_packets_ro
     // CHECK-NEXT: icmp ne i32 [[R]], 0
 
     if (get_pipe_max_packets(w))
         (void)0;
-    // CHECK: [[R:%[0-9]+]] = call i32 @__get_pipe_max_packets_wo
+    // CHECK: [[R:%[0-9]+]] = call spir_func i32 @__get_pipe_max_packets_wo
     // CHECK-NEXT: icmp ne i32 [[R]], 0
 }
 
@@ -61,21 +61,21 @@
 
     if (to_global(ptr))
         (void)0;
-    // CHECK:       [[P:%[0-9]+]] = call [[GLOBAL_VOID:i8 addrspace\(1\)\*]] @__to_global([[GENERIC_VOID:i8 addrspace\(4\)\*]] {{%[0-9]+}})
+    // CHECK:       [[P:%[0-9]+]] = call spir_func [[GLOBAL_VOID:i8 addrspace\(1\)\*]] @__to_global([[GENERIC_VOID:i8 addrspace\(4\)\*]] {{%[0-9]+}})
     // CHECK-NEXT:  [[Q:%[0-9]+]] = bitcast [[GLOBAL_VOID]] [[P]] to [[GLOBAL_i64:i64 addrspace\(1\)\*]]
     // CHECK-NEXT:  [[BOOL:%[a-z0-9]+]] = icmp ne [[GLOBAL_i64]] [[Q]], null
     // CHECK-NEXT:  br i1 [[BOOL]]
 
     if (to_local(ptr))
         (void)0;
-    // CHECK:       [[P:%[0-9]+]] = call [[LOCAL_VOID:i8 addrspace\(3\)\*]] @__to_local([[GENERIC_VOID]] {{%[0-9]+}})
+    // CHECK:       [[P:%[0-9]+]] = call spir_func [[LOCAL_VOID:i8 addrspace\(3\)\*]] @__to_local([[GENERIC_VOID]] {{%[0-9]+}})
     // CHECK-NEXT:  [[Q:%[0-9]+]] = bitcast [[LOCAL_VOID]] [[P]] to [[LOCAL_i64:i64 addrspace\(3\)\*]]
     // CHECK-NEXT:  [[BOOL:%[a-z0-9]+]] = icmp ne [[LOCAL_i64]] [[Q]], null
     // CHECK-NEXT:  br i1 [[BOOL]]
 
     if (to_private(ptr))
         (void)0;
-    // CHECK:       [[P:%[0-9]+]] = call [[PRIVATE_VOID:i8\*]] @__to_private([[GENERIC_VOID]] {{%[0-9]+}})
+    // CHECK:       [[P:%[0-9]+]] = call spir_func [[PRIVATE_VOID:i8\*]] @__to_private([[GENERIC_VOID]] {{%[0-9]+}})
     // CHECK-NEXT:  [[Q:%[0-9]+]] = bitcast [[PRIVATE_VOID]] [[P]] to [[PRIVATE_i64:i64\*]]
     // CHECK-NEXT:  [[BOOL:%[a-z0-9]+]] = icmp ne [[PRIVATE_i64]] [[Q]], null
     // CHECK-NEXT:  br i1 [[BOOL]]
Index: clang/test/CodeGen/complex-math.c
===================================================================
--- clang/test/CodeGen/complex-math.c
+++ clang/test/CodeGen/complex-math.c
@@ -6,6 +6,7 @@
 // RUN: %clang_cc1 %s -O0 -fno-experimental-new-pass-manager -emit-llvm -triple armv7-none-linux-gnueabihf -o - | FileCheck %s --check-prefix=ARMHF
 // RUN: %clang_cc1 %s -O0 -fno-experimental-new-pass-manager -emit-llvm -triple thumbv7k-apple-watchos2.0 -o - -target-abi aapcs16 | FileCheck %s --check-prefix=ARM7K
 // RUN: %clang_cc1 %s -O0 -fno-experimental-new-pass-manager -emit-llvm -triple aarch64-unknown-unknown -ffast-math -ffp-contract=fast -o - | FileCheck %s --check-prefix=AARCH64-FASTMATH
+// RUN: %clang_cc1 %s -O0 -fno-experimental-new-pass-manager -emit-llvm -triple spir -o - | FileCheck %s --check-prefix=SPIR
 
 float _Complex add_float_rr(float a, float b) {
   // X86-LABEL: @add_float_rr(
@@ -107,6 +108,7 @@
   // X86: fcmp uno float %[[RI]]
   // X86: call {{.*}} @__mulsc3(
   // X86: ret
+  // SPIR: call spir_func {{.*}} @__mulsc3(
   return a * b;
 }
 
@@ -131,6 +133,8 @@
   // X86: call {{.*}} @__divsc3(
   // X86: ret
 
+  // SPIR: call spir_func {{.*}} @__divsc3(
+
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
   // AARCH64-FASTMATH-LABEL: @div_float_rc(float %a, [2 x float] %b.coerce)
   // A = a
@@ -158,6 +162,8 @@
   // X86: call {{.*}} @__divsc3(
   // X86: ret
 
+  // SPIR: call spir_func {{.*}} @__divsc3(
+
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
   // AARCH64-FASTMATH-LABEL: @div_float_cc([2 x float] %a.coerce, [2 x float] %b.coerce)
   //
@@ -278,6 +284,8 @@
   // X86: fcmp uno double %[[RI]]
   // X86: call {{.*}} @__muldc3(
   // X86: ret
+
+  // SPIR: call spir_func {{.*}} @__muldc3(
   return a * b;
 }
 
@@ -302,6 +310,8 @@
   // X86: call {{.*}} @__divdc3(
   // X86: ret
 
+  // SPIR: call spir_func {{.*}} @__divdc3(
+
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
   // AARCH64-FASTMATH-LABEL: @div_double_rc(double %a, [2 x double] %b.coerce)
   // A = a
@@ -329,6 +339,8 @@
   // X86: call {{.*}} @__divdc3(
   // X86: ret
 
+  // SPIR: call spir_func {{.*}} @__divdc3(
+
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
   // AARCH64-FASTMATH-LABEL: @div_double_cc([2 x double] %a.coerce, [2 x double] %b.coerce)
   //
@@ -463,6 +475,7 @@
   // PPC: fcmp uno ppc_fp128 %[[RI]]
   // PPC: call {{.*}} @__multc3(
   // PPC: ret
+  // SPIR: call spir_func {{.*}} @__muldc3(
   return a * b;
 }
 
@@ -490,6 +503,7 @@
   // PPC-NOT: fdiv
   // PPC: call {{.*}} @__divtc3(
   // PPC: ret
+  // SPIR: call spir_func {{.*}} @__divdc3(
 
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
   // AARCH64-FASTMATH-LABEL: @div_long_double_rc(fp128 %a, [2 x fp128] %b.coerce)
@@ -521,6 +535,7 @@
   // PPC-NOT: fdiv
   // PPC: call {{.*}} @__divtc3(
   // PPC: ret
+  // SPIR: call spir_func {{.*}} @__divdc3(
 
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
   // AARCH64-FASTMATH-LABEL: @div_long_double_cc([2 x fp128] %a.coerce, [2 x fp128] %b.coerce)
@@ -603,6 +618,8 @@
   // ARM-LABEL: @foo(
   // ARM: call void @__muldc3
 
+  // SPIR: call spir_func void @__muldc3
+
   // ARMHF-LABEL: @foo(
   // ARMHF: call { double, double } @__muldc3
 
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -9979,15 +9979,28 @@
 // SPIR ABI Implementation
 //===----------------------------------------------------------------------===//
 
+namespace {
+class SPIRABIInfo : public DefaultABIInfo {
+public:
+  SPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); }
+
+private:
+  void setCCs();
+};
+} // end anonymous namespace
 namespace {
 class SPIRTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
-      : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {}
+      : TargetCodeGenInfo(std::make_unique<SPIRABIInfo>(CGT)) {}
   unsigned getOpenCLKernelCallingConv() const override;
 };
 
 } // End anonymous namespace.
+void SPIRABIInfo::setCCs() {
+  assert(getRuntimeCC() == llvm::CallingConv::C);
+  RuntimeCC = llvm::CallingConv::SPIR_FUNC;
+}
 
 namespace clang {
 namespace CodeGen {
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -4312,9 +4312,8 @@
       llvm::FunctionType *FTy = llvm::FunctionType::get(
           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
-      return RValue::get(
-          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
-                             {Arg0, BCast, PacketSize, PacketAlign}));
+      return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
+                                      {Arg0, BCast, PacketSize, PacketAlign}));
     } else {
       assert(4 == E->getNumArgs() &&
              "Illegal number of parameters to pipe function");
@@ -4332,7 +4331,7 @@
       // it to i32.
       if (Arg2->getType() != Int32Ty)
         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
-      return RValue::get(Builder.CreateCall(
+      return RValue::get(EmitRuntimeCall(
           CGM.CreateRuntimeFunction(FTy, Name),
           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
     }
@@ -4376,7 +4375,7 @@
     if (Arg1->getType() != Int32Ty)
       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
     return RValue::get(
-        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+        EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
                            {Arg0, Arg1, PacketSize, PacketAlign}));
   }
   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
@@ -4414,7 +4413,7 @@
                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
 
     return RValue::get(
-        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+        EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
                            {Arg0, Arg1, PacketSize, PacketAlign}));
   }
   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
@@ -4438,7 +4437,7 @@
     llvm::FunctionType *FTy = llvm::FunctionType::get(
         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
 
-    return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+    return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
                                           {Arg0, PacketSize, PacketAlign}));
   }
 
@@ -4461,7 +4460,7 @@
       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
     auto NewCall =
-        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
+        EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
       ConvertType(E->getType())));
   }
@@ -4504,7 +4503,7 @@
           llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
 
       auto RTCall =
-          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
+          EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
                              {Queue, Flags, Range, Kernel, Block});
       RTCall->setAttributes(ByValAttrSet);
       return RValue::get(RTCall);
@@ -4564,7 +4563,7 @@
 
       llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
       auto Call = RValue::get(
-          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
+          EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
       if (TmpSize)
         EmitLifetimeEnd(TmpSize, TmpPtr);
       return Call;
@@ -4622,7 +4621,7 @@
         llvm::FunctionType *FTy = llvm::FunctionType::get(
             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
         return RValue::get(
-            Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+            EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
                                llvm::ArrayRef<llvm::Value *>(Args)));
       }
       // Has event info and variadics
@@ -4639,7 +4638,7 @@
       llvm::FunctionType *FTy = llvm::FunctionType::get(
           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
       auto Call =
-          RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+          RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
                                          llvm::ArrayRef<llvm::Value *>(Args)));
       if (TmpSize)
         EmitLifetimeEnd(TmpSize, TmpPtr);
@@ -4656,7 +4655,7 @@
         CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
     Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
     Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
-    return RValue::get(Builder.CreateCall(
+    return RValue::get(EmitRuntimeCall(
         CGM.CreateRuntimeFunction(
             llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
                                     false),
@@ -4670,7 +4669,7 @@
         CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
     Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
     Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
-    return RValue::get(Builder.CreateCall(
+    return RValue::get(EmitRuntimeCall(
         CGM.CreateRuntimeFunction(
             llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
                                     false),
@@ -4691,7 +4690,7 @@
         BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
             ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
             : "__get_kernel_sub_group_count_for_ndrange_impl";
-    return RValue::get(Builder.CreateCall(
+    return RValue::get(EmitRuntimeCall(
         CGM.CreateRuntimeFunction(
             llvm::FunctionType::get(
                 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to