llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Vikash Gupta (vg0204)

<details>
<summary>Changes</summary>

The __builtin_alloca was returning a flat pointer with no address space when 
compiled using openCL1.2 or below but worked fine with openCL2.0 and above. 
This accounts to the fact that later uses the concept of generic address space 
which supports cast to other address space(i.e to private address space which 
is used for stack allocation) .

So, in  case of openCL1.2 and below __built_alloca is supposed to return 
pointer to private address space to eliminate the need of casting as not 
supported here. Thus,it requires redefintion of the builtin function with 
appropraite return pointer to appropriate address space.

---
Full diff: https://github.com/llvm/llvm-project/pull/95750.diff


3 Files Affected:

- (modified) clang/lib/Sema/SemaExpr.cpp (+20-3) 
- (added) clang/test/CodeGenOpenCL/builtins-alloca.cl (+86) 
- (modified) clang/test/CodeGenOpenCL/memcpy.cl () 


``````````diff
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 99a8704298314..e12c2a9209706 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6231,7 +6231,10 @@ bool Sema::CheckArgsForPlaceholders(MultiExprArg args) {
 ///                  it does not contain any pointer arguments without
 ///                  an address space qualifer.  Otherwise the rewritten
 ///                  FunctionDecl is returned.
-/// TODO: Handle pointer return types.
+///
+/// Pointer return type with no explicit address space is assigned the
+/// default address space where pointer points to based on the language
+/// option used to compile it.
 static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext 
&Context,
                                                 FunctionDecl *FDecl,
                                                 MultiExprArg ArgExprs) {
@@ -6275,13 +6278,27 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema 
*Sema, ASTContext &Context,
     OverloadParams.push_back(Context.getPointerType(PointeeType));
   }
 
+  QualType ReturnTy = FT->getReturnType();
+  QualType OverloadReturnTy = ReturnTy;
+  if (ReturnTy->isPointerType() &&
+      !ReturnTy->getPointeeType().hasAddressSpace()) {
+    if (Sema->getLangOpts().OpenCL) {
+      NeedsNewDecl = true;
+
+      QualType ReturnPtTy = ReturnTy->getPointeeType();
+      LangAS defClAS = Context.getDefaultOpenCLPointeeAddrSpace();
+      ReturnPtTy = Context.getAddrSpaceQualType(ReturnPtTy, defClAS);
+      OverloadReturnTy = Context.getPointerType(ReturnPtTy);
+    }
+  }
+
   if (!NeedsNewDecl)
     return nullptr;
 
   FunctionProtoType::ExtProtoInfo EPI;
   EPI.Variadic = FT->isVariadic();
-  QualType OverloadTy = Context.getFunctionType(FT->getReturnType(),
-                                                OverloadParams, EPI);
+  QualType OverloadTy =
+      Context.getFunctionType(OverloadReturnTy, OverloadParams, EPI);
   DeclContext *Parent = FDecl->getParent();
   FunctionDecl *OverloadDecl = FunctionDecl::Create(
       Context, Parent, FDecl->getLocation(), FDecl->getLocation(),
diff --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl 
b/clang/test/CodeGenOpenCL/builtins-alloca.cl
new file mode 100644
index 0000000000000..74a86955f2e4f
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl
@@ -0,0 +1,86 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 -emit-llvm 
-o - | FileCheck --check-prefix=OPENCL12 %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 -emit-llvm 
-o - | FileCheck --check-prefix=OPENCL20 %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -emit-llvm 
-o - | FileCheck --check-prefix=OPENCL30 %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 
-cl-ext=+__opencl_c_generic_address_space -emit-llvm -o - | FileCheck 
--check-prefix=OPENCL30-EXT %s
+
+// OPENCL12-LABEL: define dso_local ptr addrspace(5) @test1(
+// OPENCL12-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL12-NEXT:  [[ENTRY:.*:]]
+// OPENCL12-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, 
addrspace(5)
+// OPENCL12-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) 
[[ALLOC_PTR]], align 4
+// OPENCL12-NEXT:    [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) 
[[ALLOC_PTR]], align 4
+// OPENCL12-NEXT:    ret ptr addrspace(5) [[TMP1]]
+//
+// OPENCL20-LABEL: define dso_local ptr @test1(
+// OPENCL20-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL20-NEXT:  [[ENTRY:.*:]]
+// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPENCL20-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
+// OPENCL20-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to 
ptr
+// OPENCL20-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 
8
+// OPENCL20-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[ALLOC_PTR]], 
align 8
+// OPENCL20-NEXT:    ret ptr [[TMP2]]
+//
+// OPENCL30-LABEL: define dso_local ptr addrspace(5) @test1(
+// OPENCL30-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL30-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, 
addrspace(5)
+// OPENCL30-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) 
[[ALLOC_PTR]], align 4
+// OPENCL30-NEXT:    [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) 
[[ALLOC_PTR]], align 4
+// OPENCL30-NEXT:    ret ptr addrspace(5) [[TMP1]]
+//
+// OPENCL30-EXT-LABEL: define dso_local ptr @test1(
+// OPENCL30-EXT-SAME: ) #[[ATTR0:[0-9]+]] {
+// OPENCL30-EXT-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = alloca i8, i64 128, align 8, 
addrspace(5)
+// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) 
[[TMP0]] to ptr
+// OPENCL30-EXT-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], 
align 8
+// OPENCL30-EXT-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) 
[[ALLOC_PTR]], align 8
+// OPENCL30-EXT-NEXT:    ret ptr [[TMP2]]
+//
+float* test1() {
+    float* alloc_ptr = (float*)__builtin_alloca(32 * sizeof(int));
+    return alloc_ptr;
+}
+
+// OPENCL12-LABEL: define dso_local void @test2(
+// OPENCL12-SAME: ) #[[ATTR0]] {
+// OPENCL12-NEXT:  [[ENTRY:.*:]]
+// OPENCL12-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, 
addrspace(5)
+// OPENCL12-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
+// OPENCL12-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) 
[[ALLOC_PTR]], align 4
+// OPENCL12-NEXT:    ret void
+//
+// OPENCL20-LABEL: define dso_local void @test2(
+// OPENCL20-SAME: ) #[[ATTR0]] {
+// OPENCL20-NEXT:  [[ENTRY:.*:]]
+// OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPENCL20-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
+// OPENCL20-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to 
ptr
+// OPENCL20-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 
8
+// OPENCL20-NEXT:    ret void
+//
+// OPENCL30-LABEL: define dso_local void @test2(
+// OPENCL30-SAME: ) #[[ATTR0]] {
+// OPENCL30-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, 
addrspace(5)
+// OPENCL30-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
+// OPENCL30-NEXT:    store ptr addrspace(5) [[TMP0]], ptr addrspace(5) 
[[ALLOC_PTR]], align 4
+// OPENCL30-NEXT:    ret void
+//
+// OPENCL30-EXT-LABEL: define dso_local void @test2(
+// OPENCL30-EXT-SAME: ) #[[ATTR0]] {
+// OPENCL30-EXT-NEXT:  [[ENTRY:.*:]]
+// OPENCL30-EXT-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OPENCL30-EXT-NEXT:    [[TMP0:%.*]] = alloca i8, i64 28, align 8, 
addrspace(5)
+// OPENCL30-EXT-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(5) 
[[TMP0]] to ptr
+// OPENCL30-EXT-NEXT:    store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], 
align 8
+// OPENCL30-EXT-NEXT:    ret void
+//
+void test2() {
+    void *alloc_ptr = __builtin_alloca(28);
+}
diff --git a/clang/test/CodeGenOpenCL/memcpy.cl 
b/clang/test/CodeGenOpenCL/memcpy.cl
old mode 100644
new mode 100755

``````````

</details>


https://github.com/llvm/llvm-project/pull/95750
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to