This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG18834dca2d78: [OpenCL] Mark kernel arguments as ABI aligned 
(authored by nikic).
Herald added subscribers: cfe-commits, ldrumm.
Herald added a project: clang.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D118894/new/

https://reviews.llvm.org/D118894

Files:
  clang/lib/CodeGen/CGCall.cpp
  clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl
  clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
  clang/test/CodeGenOpenCL/kernel-param-alignment.cl
  clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
  clang/test/CodeGenOpenCL/spir-calling-conv.cl

Index: clang/test/CodeGenOpenCL/spir-calling-conv.cl
===================================================================
--- clang/test/CodeGenOpenCL/spir-calling-conv.cl
+++ clang/test/CodeGenOpenCL/spir-calling-conv.cl
@@ -5,14 +5,14 @@
 kernel void bar(global int *A);
 
 kernel void foo(global int *A)
-// CHECK: define{{.*}} spir_kernel void @foo(i32 addrspace(1)* noundef %A)
+// CHECK: define{{.*}} spir_kernel void @foo(i32 addrspace(1)* noundef align 4 %A)
 {
   int id = get_dummy_id(0);
   // CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0)
   A[id] = id;
   bar(A);
-  // CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* noundef %A)
+  // CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* noundef align 4 %A)
 }
 
 // CHECK: declare spir_func i32 @get_dummy_id(i32 noundef)
-// CHECK: declare spir_kernel void @bar(i32 addrspace(1)* noundef)
+// CHECK: declare spir_kernel void @bar(i32 addrspace(1)* noundef align 4)
Index: clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
===================================================================
--- clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
+++ clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
@@ -28,7 +28,7 @@
 // CHECK: spir_kernel
 // AMDGCN: define{{.*}} amdgpu_kernel void @test_single
 // CHECK: struct.int_single* nocapture {{.*}} byval(%struct.int_single)
-// CHECK: i32* nocapture noundef writeonly %output
+// CHECK: i32* nocapture noundef writeonly align 4 %output
  output[0] = input.a;
 }
 
@@ -36,7 +36,7 @@
 // CHECK: spir_kernel
 // AMDGCN: define{{.*}} amdgpu_kernel void @test_pair
 // CHECK: struct.int_pair* nocapture {{.*}} byval(%struct.int_pair)
-// CHECK: i32* nocapture noundef writeonly %output
+// CHECK: i32* nocapture noundef writeonly align 4 %output
  output[0] = (int)input.a;
  output[1] = (int)input.b;
 }
@@ -45,7 +45,7 @@
 // CHECK: spir_kernel
 // AMDGCN: define{{.*}} amdgpu_kernel void @test_kernel
 // CHECK: struct.test_struct* nocapture {{.*}} byval(%struct.test_struct)
-// CHECK: i32* nocapture noundef writeonly %output
+// CHECK: i32* nocapture noundef writeonly align 4 %output
  output[0] = input.elementA;
  output[1] = input.elementB;
  output[2] = (int)input.elementC;
Index: clang/test/CodeGenOpenCL/kernel-param-alignment.cl
===================================================================
--- /dev/null
+++ clang/test/CodeGenOpenCL/kernel-param-alignment.cl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s
+
+// Test that pointer arguments to kernels are assumed to be ABI aligned.
+
+struct __attribute__((packed, aligned(1))) packed {
+  int i32;
+};
+
+typedef __attribute__((ext_vector_type(4))) int int4;
+typedef __attribute__((ext_vector_type(2))) float float2;
+
+kernel void test(
+    global int *i32,
+    global long *i64,
+    global int4 *v4i32,
+    global float2 *v2f32,
+    global void *v,
+    global struct packed *p) {
+// CHECK-LABEL: spir_kernel void @test(
+// CHECK-SAME: i32* nocapture noundef align 4 %i32,
+// CHECK-SAME: i64* nocapture noundef align 8 %i64,
+// CHECK-SAME: <4 x i32>* nocapture noundef align 16 %v4i32,
+// CHECK-SAME: <2 x float>* nocapture noundef align 8 %v2f32,
+// CHECK-SAME: i8* nocapture noundef %v,
+// CHECK-SAME: %struct.packed* nocapture noundef align 1 %p)
+}
Index: clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
===================================================================
--- clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -34,7 +34,7 @@
   out[id] = id;
 }
 
-// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(i32 addrspace(1)* %{{.*}}, i32 addrspace(1)* %b, i32 %i)
+// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(i32 addrspace(1)* align 4 %{{.*}}, i32 addrspace(1)* align 4 %b, i32 %i)
 kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // COMMON: %default_queue = alloca %opencl.queue_t*
   queue_t default_queue;
Index: clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl
+++ clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl
@@ -1,6 +1,6 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
-// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly %out)
+// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly align 4 %out)
 // CHECK: store i32 4, i32 addrspace(1)* %out, align 4
 
 kernel void test_kernel(global int *out)
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -2485,6 +2485,20 @@
       }
     }
 
+    // From OpenCL spec v3.0.10 section 6.3.5 Alignment of Types:
+    // > For arguments to a __kernel function declared to be a pointer to a
+    // > data type, the OpenCL compiler can assume that the pointee is always
+    // > appropriately aligned as required by the data type.
+    if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>() &&
+        ParamType->isPointerType()) {
+      QualType PTy = ParamType->getPointeeType();
+      if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
+        llvm::Align Alignment =
+            getNaturalPointeeTypeAlignment(ParamType).getAsAlign();
+        Attrs.addAlignmentAttr(Alignment);
+      }
+    }
+
     switch (FI.getExtParameterInfo(ArgNo).getABI()) {
     case ParameterABI::Ordinary:
       break;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to