yaxunl updated this revision to Diff 106030.
yaxunl marked an inline comment as done.
yaxunl edited the summary of this revision.
yaxunl added a comment.

Always put indirect arg in alloca addr space.


https://reviews.llvm.org/D34367

Files:
  lib/CodeGen/CGCall.cpp
  lib/CodeGen/CGDecl.cpp
  test/CodeGenCXX/amdgcn-func-arg.cpp

Index: test/CodeGenCXX/amdgcn-func-arg.cpp
===================================================================
--- /dev/null
+++ test/CodeGenCXX/amdgcn-func-arg.cpp
@@ -0,0 +1,94 @@
+// RUN: %clang_cc1 -O0 -triple amdgcn---amdgiz -emit-llvm %s -o - | FileCheck %s
+
+class A {
+public:
+  int x;
+  A():x(0) {}
+  ~A() {}
+};
+
+class B {
+int x;
+};
+
+A g_a;
+B g_b;
+
+void func_with_ref_arg(A &a);
+void func_with_ref_arg(B &b);
+
+// CHECK-LABEL: define void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %a)
+// CHECK:  %p = alloca %class.A*, align 8, addrspace(5)
+// CHECK:  %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %a to %class.A*
+// CHECK:  %[[r1:.+]] = addrspacecast %class.A* addrspace(5)* %p to %class.A**
+// CHECK:  store %class.A* %[[r0]], %class.A** %[[r1]], align 8
+void func_with_indirect_arg(A a) {
+  A *p = &a;
+}
+
+// CHECK-LABEL: define void @_Z22test_indirect_arg_autov()
+// CHECK:  %a = alloca %class.A, align 4, addrspace(5)
+// CHECK:  %agg.tmp = alloca %class.A, align 4, addrspace(5)
+// CHECK:  %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %a to %class.A*
+// CHECK:  call void @_ZN1AC1Ev(%class.A* %[[r0]])
+// CHECK:  %[[r1:.+]] = addrspacecast %class.A addrspace(5)* %agg.tmp to %class.A*
+// CHECK:  call void @llvm.memcpy.p0i8.p0i8.i64
+// CHECK:  %[[r4:.+]] = addrspacecast %class.A* %[[r1]] to %class.A addrspace(5)*
+// CHECK:  call void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %[[r4]])
+// CHECK:  call void @_ZN1AD1Ev(%class.A* %[[r1]])
+// CHECK:  call void @_Z17func_with_ref_argR1A(%class.A* dereferenceable(4) %[[r0]])
+// CHECK:  call void @_ZN1AD1Ev(%class.A* %[[r0]])
+void test_indirect_arg_auto() {
+  A a;
+  func_with_indirect_arg(a);
+  func_with_ref_arg(a);
+}
+
+// CHECK: define void @_Z24test_indirect_arg_globalv()
+// CHECK:  %agg.tmp = alloca %class.A, align 4, addrspace(5)
+// CHECK:  %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %agg.tmp to %class.A*
+// CHECK:  call void @llvm.memcpy.p0i8.p0i8.i64
+// CHECK:  %[[r2:.+]] = addrspacecast %class.A* %[[r0]] to %class.A addrspace(5)*
+// CHECK:  call void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %[[r2]])
+// CHECK:  call void @_ZN1AD1Ev(%class.A* %[[r0]])
+// CHECK:  call void @_Z17func_with_ref_argR1A(%class.A* dereferenceable(4) addrspacecast (%class.A addrspace(1)* @g_a to %class.A*))
+void test_indirect_arg_global() {
+  func_with_indirect_arg(g_a);
+  func_with_ref_arg(g_a);
+}
+
+// CHECK-LABEL: define void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %b)
+// CHECK:  %p = alloca %class.B*, align 8, addrspace(5)
+// CHECK:  %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %b to %class.B*
+// CHECK:  %[[r1:.+]] = addrspacecast %class.B* addrspace(5)* %p to %class.B**
+// CHECK:  store %class.B* %[[r0]], %class.B** %[[r1]], align 8
+void func_with_byval_arg(B b) {
+  B *p = &b;
+}
+
+// CHECK-LABEL: define void @_Z19test_byval_arg_autov()
+// CHECK:  %b = alloca %class.B, align 4, addrspace(5)
+// CHECK:  %agg.tmp = alloca %class.B, align 4, addrspace(5)
+// CHECK:  %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %b to %class.B*
+// CHECK:  %[[r1:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B*
+// CHECK:  call void @llvm.memcpy.p0i8.p0i8.i64
+// CHECK:  %[[r4:.+]] = addrspacecast %class.B* %[[r1]] to %class.B addrspace(5)*
+// CHECK:  call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %[[r4]])
+// CHECK:  call void @_Z17func_with_ref_argR1B(%class.B* dereferenceable(4) %[[r0]])
+void test_byval_arg_auto() {
+  B b;
+  func_with_byval_arg(b);
+  func_with_ref_arg(b);
+}
+
+// CHECK-LABEL: define void @_Z21test_byval_arg_globalv()
+// CHECK:  %agg.tmp = alloca %class.B, align 4, addrspace(5)
+// CHECK:  %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B*
+// CHECK:  call void @llvm.memcpy.p0i8.p0i8.i64
+// CHECK:  %[[r2:.+]] = addrspacecast %class.B* %[[r0]] to %class.B addrspace(5)*
+// CHECK:  call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %[[r2]])
+// CHECK:  call void @_Z17func_with_ref_argR1B(%class.B* dereferenceable(4) addrspacecast (%class.B addrspace(1)* @g_b to %class.B*))
+void test_byval_arg_global() {
+  func_with_byval_arg(g_b);
+  func_with_ref_arg(g_b);
+}
Index: lib/CodeGen/CGDecl.cpp
===================================================================
--- lib/CodeGen/CGDecl.cpp
+++ lib/CodeGen/CGDecl.cpp
@@ -1819,6 +1819,19 @@
     llvm::Type *IRTy = ConvertTypeForMem(Ty)->getPointerTo(AS);
     if (DeclPtr.getType() != IRTy)
       DeclPtr = Builder.CreateBitCast(DeclPtr, IRTy, D.getName());
+    // Indirect argument is in alloca address space, which may be different
+    // from the default address space.
+    auto AllocaAS = CGM.getASTAllocaAddressSpace();
+    auto *V = DeclPtr.getPointer();
+    auto SrcAS = V->getType()->getPointerAddressSpace();
+    auto DestAS = getContext().getTargetAddressSpace(LangAS::Default);
+    if (SrcAS != DestAS) {
+      assert(SrcAS == CGM.getDataLayout().getAllocaAddrSpace());
+      auto *T = V->getType()->getPointerElementType()->getPointerTo(DestAS);
+      DeclPtr = Address(getTargetHooks().performAddrSpaceCast(
+                            *this, V, AllocaAS, LangAS::Default, T, true),
+                        DeclPtr.getAlignment());
+    }
 
     // Push a destructor cleanup for this parameter if the ABI requires it.
     // Don't push a cleanup in a thunk for a method that will also emit a
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -1599,8 +1599,8 @@
       assert(NumIRArgs == 1);
       // indirect arguments are always on the stack, which is alloca addr space.
       llvm::Type *LTy = ConvertTypeForMem(it->type);
-      ArgTypes[FirstIRArg] = LTy->getPointerTo(
-          CGM.getDataLayout().getAllocaAddrSpace());
+      ArgTypes[FirstIRArg] =
+          LTy->getPointerTo(CGM.getDataLayout().getAllocaAddrSpace());
       break;
     }
 
@@ -3818,12 +3818,18 @@
     }
 
     case ABIArgInfo::Indirect: {
+      auto CastToAllocaAddrSpace = [&](llvm::Value *V) {
+        auto *T = V->getType()->getPointerElementType()->getPointerTo(
+            CGM.getDataLayout().getAllocaAddrSpace());
+        return getTargetHooks().performAddrSpaceCast(
+            *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T, true);
+      };
       assert(NumIRArgs == 1);
       if (RV.isScalar() || RV.isComplex()) {
         // Make a temporary alloca to pass the argument.
         Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
-                                     "indirect-arg-temp", false);
-        IRCallArgs[FirstIRArg] = Addr.getPointer();
+                                     "indirect-arg-temp");
+        IRCallArgs[FirstIRArg] = CastToAllocaAddrSpace(Addr.getPointer());
 
         LValue argLV = MakeAddrLValue(Addr, I->Ty);
         EmitInitStoreOfNonAggregate(*this, RV, argLV);
@@ -3835,29 +3841,32 @@
         // 2. If the argument is byval, RV is not sufficiently aligned, and
         //    we cannot force it to be sufficiently aligned.
         // 3. If the argument is byval, but RV is located in an address space
-        //    different than that of the argument (0).
+        //    different than that of the argument (alloca address space).
         Address Addr = RV.getAggregateAddress();
         CharUnits Align = ArgInfo.getIndirectAlign();
         const llvm::DataLayout *TD = &CGM.getDataLayout();
-        const unsigned RVAddrSpace = Addr.getType()->getAddressSpace();
+        const unsigned RVAddrSpace = Addr.getPointer()
+                                         ->stripPointerCasts()
+                                         ->getType()
+                                         ->getPointerAddressSpace();
         const unsigned ArgAddrSpace =
             (FirstIRArg < IRFuncTy->getNumParams()
                  ? IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace()
-                 : 0);
+                 : TD->getAllocaAddrSpace());
         if ((!ArgInfo.getIndirectByVal() && I->NeedsCopy) ||
             (ArgInfo.getIndirectByVal() && Addr.getAlignment() < Align &&
              llvm::getOrEnforceKnownAlignment(Addr.getPointer(),
                                               Align.getQuantity(), *TD)
                < Align.getQuantity()) ||
             (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
           // Create an aligned temporary, and copy to it.
           Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
-                                     "byval-temp", false);
-          IRCallArgs[FirstIRArg] = AI.getPointer();
+                                     "byval-temp");
+          IRCallArgs[FirstIRArg] = CastToAllocaAddrSpace(AI.getPointer());
           EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified());
         } else {
           // Skip the extra memcpy call.
-          IRCallArgs[FirstIRArg] = Addr.getPointer();
+          IRCallArgs[FirstIRArg] = CastToAllocaAddrSpace(Addr.getPointer());
         }
       }
       break;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to