yaxunl updated this revision to Diff 341257.
yaxunl added a comment.

cast return value to default address space since it is expected. also fix debug 
info


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D101389/new/

https://reviews.llvm.org/D101389

Files:
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGDecl.cpp
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/test/CodeGenCUDA/amdgpu-sret.cu

Index: clang/test/CodeGenCUDA/amdgpu-sret.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-sret.cu
@@ -0,0 +1,101 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \
+// RUN:   -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device \
+// RUN:   -emit-llvm -o - -x hip %s -debug-info-kind=limited \
+// RUN:   | FileCheck %s
+
+// Check no assertion with debug info.
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \
+// RUN:   -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device \
+// RUN:   -S -o %t.s -x hip %s \
+// RUN:   -debug-info-kind=limited
+
+#include "Inputs/cuda.h"
+ 
+struct A {
+  int x[100];
+  __device__ A();
+};
+
+struct B {
+  int x[100];
+};
+
+__device__ B b;
+
+__device__ void callee(A *a);
+
+// CHECK-LABEL: @_Z5func1v(
+// CHECK-SAME: %struct.A addrspace(5)* noalias sret(%struct.A) align 4 %[[RET:.*]])
+// CHECK: %x = alloca [100 x i32], align 16, addrspace(5)
+// CHECK: %x.ascast = addrspacecast [100 x i32] addrspace(5)* %x to [100 x i32]*
+// CHECK: %p = alloca %struct.A*, align 8, addrspace(5)
+// CHECK: %p.ascast = addrspacecast %struct.A* addrspace(5)* %p to %struct.A**
+// CHECK: %[[RET_CAST:.*]] = addrspacecast %struct.A addrspace(5)* %[[RET]] to %struct.A*
+// CHECK: call void @llvm.dbg.declare(metadata %struct.A addrspace(5)* %[[RET]]
+// CHECK: call void @_ZN1AC1Ev(%struct.A* nonnull dereferenceable(400) %[[RET_CAST]])
+// CHECK: call void @llvm.dbg.declare(metadata [100 x i32] addrspace(5)* %x
+// CHECK: call void @_Z6calleeP1A(%struct.A* %[[RET_CAST]])
+// CHECK: %[[RET_CAST2:.*]] = bitcast %struct.A* %[[RET_CAST]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %[[RET_CAST2]], i8* align 16 %{{.*}}, i64 400, i1 false)
+// CHECK: call void @llvm.dbg.declare(metadata %struct.A* addrspace(5)* %p
+// CHECK: store %struct.A* %[[RET_CAST]], %struct.A** %p.ascast
+__device__ A func1() {
+  A a;
+  int x[100];
+  callee(&a);
+  __builtin_memcpy(&a, x, 400);
+  A *p = &a;
+  return a;
+}
+
+// CHECK-LABEL: @_Z6func1av(%struct.B addrspace(5)* noalias sret(%struct.B) align 4 
+__device__ B func1a() {
+  B b;
+  return b;
+}
+
+// Check returning the return value again.
+
+// CHECK-LABEL: @_Z5func2v(
+// CHECK-SAME: %struct.A addrspace(5)* noalias sret(%struct.A) align 4 %[[RET:.*]])
+// CHECK: %[[CAST1:.*]] = addrspacecast %struct.A addrspace(5)* %[[RET]] to %struct.A*
+// CHECK: %[[CAST2:.*]] = addrspacecast %struct.A* %[[CAST1]] to %struct.A addrspace(5)*
+// CHECK: call void @_Z5func1v(%struct.A addrspace(5)* sret(%struct.A) align 4 %[[CAST2]])
+__device__ A func2() {
+  A a = func1();
+  return a;
+}
+
+// Check assigning the return value to a global variable.
+
+// CHECK-LABEL: @_Z5func3v(
+// CHECK: %[[RET:.*]] = alloca %struct.B, align 4, addrspace(5)
+// CHECK: %[[CAST1:.*]] = addrspacecast %struct.B addrspace(5)* %[[RET]] to %struct.B*
+// CHECK: %[[CAST2:.*]] = addrspacecast %struct.B* %[[CAST1]] to %struct.B addrspace(5)*
+// CHECK: call void @_Z6func1av(%struct.B addrspace(5)* sret(%struct.B) align 4 %[[CAST2]]
+// CHECK: %[[CAST3:.*]] = bitcast %struct.B* %[[CAST1]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64{{.*}}@b{{.*}}%[[CAST3]]
+__device__ void func3() {
+  b = func1a();
+}
+
+// Check assigning the return value to a temporary variable.
+
+// CHECK-LABEL: @_Z5func4v(
+// CHECK: %[[TMP:.*]] = alloca %struct.A, align 4, addrspace(5)
+// CHECK: %[[TMP_CAST1:.*]] = addrspacecast %struct.A addrspace(5)* %[[TMP]] to %struct.A*
+// CHECK: %[[RET:.*]] = alloca %struct.A, align 4, addrspace(5)
+// CHECK: %[[RET_CAST1:.*]] = addrspacecast %struct.A addrspace(5)* %[[RET]] to %struct.A*
+// CHECK: call void @_ZN1AC1Ev(%struct.A* nonnull dereferenceable(400) %[[TMP_CAST1]])
+// CHECK: %[[RET_CAST2:.*]] = addrspacecast %struct.A* %[[RET_CAST1]] to %struct.A addrspace(5)*
+// CHECK: call void @_Z5func1v(%struct.A addrspace(5)* sret(%struct.A) align 4 %[[RET_CAST2]]
+// CHECK: %[[TMP_CAST2:.*]] = bitcast %struct.A* %[[TMP_CAST1]] to i8*
+// CHECK: %[[RET_CAST3:.*]] = bitcast %struct.A* %[[RET_CAST1]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64{{.*}}%[[TMP_CAST2]]{{.*}}%[[RET_CAST3]]
+__device__ void func4() {
+  A a;
+  a = func1();
+}
Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1084,6 +1084,22 @@
         RetTy->isObjCRetainableType())
       AutoreleaseResult = true;
   }
+  // Alloca address space may be different than default address space. Return
+  // value is expected to be in default address space. This is no-op if they
+  // are the same.
+  if (ReturnValue.isValid()) {
+    ReturnValue =
+        Address(getTargetHooks().performAddrSpaceCast(
+                    *this, ReturnValue.getPointer(), LangAS::Default,
+                    getASTAllocaAddressSpace(),
+                    ReturnValue.getPointer()
+                        ->getType()
+                        ->getPointerElementType()
+                        ->getPointerTo(getContext().getTargetAddressSpace(
+                            LangAS::Default)),
+                    /*non-null*/ true),
+                ReturnValue.getAlignment());
+  }
 
   EmitStartEHSpec(CurCodeDecl);
 
Index: clang/lib/CodeGen/CGDecl.cpp
===================================================================
--- clang/lib/CodeGen/CGDecl.cpp
+++ clang/lib/CodeGen/CGDecl.cpp
@@ -1604,8 +1604,11 @@
     if (UsePointerValue)
       DebugAddr = ReturnValuePointer;
 
-    (void)DI->EmitDeclareOfAutoVariable(&D, DebugAddr.getPointer(), Builder,
-                                        UsePointerValue);
+    // Local variables are casted to default address space if the alloca address
+    // space is different. Need to strip casts to get the real variables.
+    (void)DI->EmitDeclareOfAutoVariable(
+        &D, DebugAddr.getPointer()->stripPointerCasts(), Builder,
+        UsePointerValue);
   }
 
   if (D.hasAttr<AnnotateAttr>() && HaveInsertPoint())
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -1624,7 +1624,8 @@
   if (IRFunctionArgs.hasSRetArg()) {
     QualType Ret = FI.getReturnType();
     llvm::Type *Ty = ConvertType(Ret);
-    unsigned AddressSpace = Context.getTargetAddressSpace(Ret);
+    unsigned AddressSpace =
+        Context.getTargetAddressSpace(CGM.getASTAllocaAddressSpace());
     ArgTypes[IRFunctionArgs.getSRetArgNo()] =
         llvm::PointerType::get(Ty, AddressSpace);
   }
@@ -4671,7 +4672,17 @@
       }
     }
     if (IRFunctionArgs.hasSRetArg()) {
-      IRCallArgs[IRFunctionArgs.getSRetArgNo()] = SRetPtr.getPointer();
+      IRCallArgs[IRFunctionArgs.getSRetArgNo()] =
+          getTargetHooks().performAddrSpaceCast(
+              *this, SRetPtr.getPointer(), LangAS::Default,
+              getASTAllocaAddressSpace(),
+              SRetPtr.getPointer()
+                  ->getType()
+                  ->getPointerElementType()
+                  ->getPointerTo(getContext().getTargetAddressSpace(
+                      getASTAllocaAddressSpace())),
+              /*non-null*/ true);
+
     } else if (RetAI.isInAlloca()) {
       Address Addr =
           Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex());
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to