yaxunl updated this revision to Diff 81309.
yaxunl added a comment.

Cast alloca to default address space.


https://reviews.llvm.org/D27627

Files:
  include/clang/AST/ASTContext.h
  include/clang/Basic/TargetInfo.h
  lib/AST/ASTContext.cpp
  lib/Basic/Targets.cpp
  lib/CodeGen/CGCall.cpp
  lib/CodeGen/CGDecl.cpp
  lib/CodeGen/CGExpr.cpp
  lib/CodeGen/CodeGenFunction.h
  test/CodeGen/default-addr-space.cpp

Index: test/CodeGen/default-addr-space.cpp
===================================================================
--- /dev/null
+++ test/CodeGen/default-addr-space.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -O0 -std=c++11 -emit-llvm -o - -triple amdgcn-- %s | FileCheck %s
+
+// CHECK: %struct.ATy = type { i32 addrspace(4)* }
+struct ATy {
+  int *p;
+};
+
+// CHECK-LABEL: @_Z1fPi(i32 addrspace(4)* %a)
+void f(int* a) {
+  // CHECK: %[[a_addr_0:.*]] = alloca i32 addrspace(4)*
+  // CHECK: %[[a_addr:.*]] = addrspacecast i32 addrspace(4)** %[[a_addr_0]] to i32 addrspace(4)* addrspace(4)*
+  // CHECK: %[[b0:.*]] = alloca i32
+  // CHECK: %[[b:.*]] = addrspacecast i32* %[[b0]] to i32 addrspace(4)*
+  // CHECK: %[[A0:.*]] = alloca %struct.ATy, align 4
+  // CHECK: %[[A:.*]] = addrspacecast %struct.ATy* %[[A0]] to %struct.ATy addrspace(4)*
+
+  // CHECK:  store i32 addrspace(4)* %a, i32 addrspace(4)* addrspace(4)* %[[a_addr]]
+
+  // CHECK:  store i32 1, i32 addrspace(4)* %[[b]]
+  int b = 1;
+
+  // CHECK: %[[p:.*]] = getelementptr inbounds %struct.ATy, %struct.ATy addrspace(4)* %[[A]], i32 0, i32 0
+  // CHECK: store i32 addrspace(4)* %[[b]], i32 addrspace(4)* addrspace(4)* %[[p]], align 4
+  ATy A{&b};
+
+  // CHECK: %[[r0:.*]] = load i32, i32 addrspace(4)* %b
+  // CHECK: %[[r1:.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* %[[a_addr]]
+  // CHECK: store i32 %[[r0]], i32 addrspace(4)* %[[r1]]
+  *a = b;
+
+  // CHECK: store i32 addrspace(4)* %[[b]], i32 addrspace(4)* addrspace(4)* %[[a_addr]], align 4
+  a = &b;
+}
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -344,7 +344,7 @@
   };
 
   /// i32s containing the indexes of the cleanup destinations.
-  llvm::AllocaInst *NormalCleanupDest;
+  llvm::Instruction *NormalCleanupDest;
 
   unsigned NextCleanupDestIndex;
 
@@ -359,8 +359,8 @@
   llvm::Value *ExceptionSlot;
 
   /// The selector slot.  Under the MandatoryCleanup model, all landing pads
-  /// write the current selector value into this alloca.
-  llvm::AllocaInst *EHSelectorSlot;
+  /// write the current selector value into this instruction.
+  llvm::Instruction *EHSelectorSlot;
 
   /// A stack of exception code slots. Entering an __except block pushes a slot
   /// on the stack and leaving pops one. The __exception_code() intrinsic loads
@@ -395,11 +395,11 @@
 
     /// An i1 variable indicating whether or not the @finally is
     /// running for an exception.
-    llvm::AllocaInst *ForEHVar;
+    llvm::Instruction *ForEHVar;
 
     /// An i8* variable into which the exception pointer to rethrow
     /// has been saved.
-    llvm::AllocaInst *SavedExnVar;
+    llvm::Instruction *SavedExnVar;
 
   public:
     void enter(CodeGenFunction &CGF, const Stmt *Finally,
@@ -1788,14 +1788,23 @@
                             AlignmentSource *Source = nullptr);
   LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy);
 
+  /// Create an alloca instruction. If the default address space is not 0,
+  /// insert addrspacecast instruction which casts the alloca instruction
+  /// to the default address space.
+  llvm::Instruction *CreateAlloca(llvm::Type *Ty, const Twine &Name = "tmp",
+                                  llvm::Instruction *InsertPos = nullptr);
   /// CreateTempAlloca - This creates a alloca and inserts it into the entry
   /// block. The caller is responsible for setting an appropriate alignment on
-  /// the alloca.
-  llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty,
-                                     const Twine &Name = "tmp");
+  /// the alloca. If the default address space is not 0, insert addrspacecast.
+  llvm::Instruction *CreateTempAlloca(llvm::Type *Ty,
+                                      const Twine &Name = "tmp");
   Address CreateTempAlloca(llvm::Type *Ty, CharUnits align,
                            const Twine &Name = "tmp");
 
+  /// Get alloca instruction operand of an addrspacecast instruction.
+  /// If \p Inst is alloca instruction, returns \p Inst;
+  llvm::AllocaInst *getAddrSpaceCastedAlloca(llvm::Instruction *Inst) const;
+
   /// CreateDefaultAlignedTempAlloca - This creates an alloca with the
   /// default ABI alignment of the given LLVM type.
   ///
Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -59,16 +59,37 @@
 /// block.
 Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
                                           const Twine &Name) {
-  auto Alloca = CreateTempAlloca(Ty, Name);
+  auto CastedAlloca = CreateTempAlloca(Ty, Name);
+  auto *Alloca = getAddrSpaceCastedAlloca(CastedAlloca);
   Alloca->setAlignment(Align.getQuantity());
-  return Address(Alloca, Align);
+  return Address(CastedAlloca, Align);
 }
 
 /// CreateTempAlloca - This creates a alloca and inserts it into the entry
 /// block.
-llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
-                                                    const Twine &Name) {
-  return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt);
+llvm::Instruction *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
+                                                     const Twine &Name) {
+  return CreateAlloca(Ty, Name, AllocaInsertPt);
+}
+
+llvm::Instruction *CodeGenFunction::CreateAlloca(llvm::Type *Ty,
+                                                 const Twine &Name,
+                                                 llvm::Instruction *InsertPos) {
+  llvm::Instruction *V = new llvm::AllocaInst(Ty, nullptr, Name, InsertPos);
+  auto DefaultAddr = getTarget().getDefaultTargetAddressSpace(getLangOpts());
+  if (DefaultAddr != 0) {
+    auto *DestTy = llvm::PointerType::get(V->getType()->getPointerElementType(),
+                                          DefaultAddr);
+    V = new llvm::AddrSpaceCastInst(V, DestTy, "", InsertPos);
+  }
+  return V;
+}
+
+llvm::AllocaInst *
+CodeGenFunction::getAddrSpaceCastedAlloca(llvm::Instruction *V) const {
+  if (auto *Cast = dyn_cast<llvm::AddrSpaceCastInst>(V))
+    return cast<llvm::AllocaInst>(Cast->getOperand(0));
+  return cast<llvm::AllocaInst>(V);
 }
 
 /// CreateDefaultAlignTempAlloca - This creates an alloca with the
Index: lib/CodeGen/CGDecl.cpp
===================================================================
--- lib/CodeGen/CGDecl.cpp
+++ lib/CodeGen/CGDecl.cpp
@@ -1060,7 +1060,15 @@
     llvm::AllocaInst *vla = Builder.CreateAlloca(llvmTy, elementCount, "vla");
     vla->setAlignment(alignment.getQuantity());
 
-    address = Address(vla, alignment);
+    llvm::Value *V = vla;
+    auto DefaultAddr = getTarget().getDefaultTargetAddressSpace(getLangOpts());
+    if (DefaultAddr != 0) {
+      auto *DestTy =
+          llvm::PointerType::get(vla->getType()->getElementType(), DefaultAddr);
+      V = Builder.CreateAddrSpaceCast(vla, DestTy);
+    }
+
+    address = Address(V, alignment);
   }
 
   setAddrOfLocalVar(&D, address);
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -3567,18 +3567,19 @@
   if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) {
     ArgMemoryLayout = CGM.getDataLayout().getStructLayout(ArgStruct);
     llvm::Instruction *IP = CallArgs.getStackBase();
-    llvm::AllocaInst *AI;
+    llvm::Instruction *CastedAI;
     if (IP) {
       IP = IP->getNextNode();
-      AI = new llvm::AllocaInst(ArgStruct, "argmem", IP);
+      CastedAI = CreateAlloca(ArgStruct, "argmem", IP);
     } else {
-      AI = CreateTempAlloca(ArgStruct, "argmem");
+      CastedAI = CreateTempAlloca(ArgStruct, "argmem");
     }
     auto Align = CallInfo.getArgStructAlignment();
+    auto *AI = getAddrSpaceCastedAlloca(CastedAI);
     AI->setAlignment(Align.getQuantity());
     AI->setUsedWithInAlloca(true);
     assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca());
-    ArgMemory = Address(AI, Align);
+    ArgMemory = Address(CastedAI, Align);
   }
 
   // Helper function to drill into the inalloca allocation.
Index: lib/Basic/Targets.cpp
===================================================================
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -1978,16 +1978,6 @@
   }
 };
 
-static const unsigned AMDGPUAddrSpaceMap[] = {
-  1,    // opencl_global
-  3,    // opencl_local
-  2,    // opencl_constant
-  4,    // opencl_generic
-  1,    // cuda_device
-  2,    // cuda_constant
-  3     // cuda_shared
-};
-
 // If you edit the description strings, make sure you update
 // getPointerWidthV().
 
@@ -2001,9 +1991,18 @@
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
 class AMDGPUTargetInfo final : public TargetInfo {
+  static const unsigned AddrSpaceMap_[7];
   static const Builtin::Info BuiltinInfo[];
   static const char * const GCCRegNames[];
 
+  enum AddrSpaceKind {
+    AS_Private = 0,
+    AS_Global = 1,
+    AS_Constant = 2,
+    AS_Local = 3,
+    AS_Generic = 4
+  };
+
   /// \brief The GPU profiles supported by the AMDGPU target.
   enum GPUKind {
     GK_NONE,
@@ -2046,7 +2045,7 @@
     resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ?
                     DataLayoutStringSI : DataLayoutStringR600);
 
-    AddrSpaceMap = &AMDGPUAddrSpaceMap;
+    AddrSpaceMap = &AddrSpaceMap_;
     UseAddrSpaceMapMangling = true;
   }
 
@@ -2232,6 +2231,15 @@
     }
   }
 
+  unsigned
+  getDefaultTargetAddressSpace(const LangOptions &Opts) const override {
+    // OpenCL sets address space explicitly in AST. The default case (type
+    // qualifier containing no address space) represents private address space.
+    if (Opts.OpenCL)
+      return AS_Private;
+    return AS_Generic;
+  }
+
   LangAS::ID getOpenCLImageAddrSpace() const override {
     return LangAS::opencl_constant;
   }
@@ -2254,6 +2262,16 @@
   }
 };
 
+const unsigned AMDGPUTargetInfo::AddrSpaceMap_[] = {
+    AS_Global,   // opencl_global
+    AS_Local,    // opencl_local
+    AS_Constant, // opencl_constant
+    AS_Generic,  // opencl_generic
+    AS_Global,   // cuda_device
+    AS_Constant, // cuda_constant
+    AS_Local     // cuda_shared
+};
+
 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
 #define BUILTIN(ID, TYPE, ATTRS)                \
   { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
Index: lib/AST/ASTContext.cpp
===================================================================
--- lib/AST/ASTContext.cpp
+++ lib/AST/ASTContext.cpp
@@ -9436,6 +9436,12 @@
   return getTargetInfo().getNullPointerValue(AS);
 }
 
+unsigned ASTContext::getTargetAddressSpace(Qualifiers Q) const {
+  return Q.hasAddressSpace()
+             ? getTargetAddressSpace(Q.getAddressSpace())
+             : getTargetInfo().getDefaultTargetAddressSpace(LangOpts);
+}
+
 // Explicitly instantiate this in case a Redeclarable<T> is used from a TU that
 // doesn't include ASTContext.h
 template
Index: include/clang/Basic/TargetInfo.h
===================================================================
--- include/clang/Basic/TargetInfo.h
+++ include/clang/Basic/TargetInfo.h
@@ -945,6 +945,10 @@
     return *AddrSpaceMap;
   }
 
+  virtual unsigned getDefaultTargetAddressSpace(const LangOptions &Opt) const {
+    return 0;
+  }
+
   /// \brief Retrieve the name of the platform as it is used in the
   /// availability attribute.
   StringRef getPlatformName() const { return PlatformName; }
Index: include/clang/AST/ASTContext.h
===================================================================
--- include/clang/AST/ASTContext.h
+++ include/clang/AST/ASTContext.h
@@ -2284,9 +2284,7 @@
     return getTargetAddressSpace(T.getQualifiers());
   }
 
-  unsigned getTargetAddressSpace(Qualifiers Q) const {
-    return getTargetAddressSpace(Q.getAddressSpace());
-  }
+  unsigned getTargetAddressSpace(Qualifiers Q) const;
 
   unsigned getTargetAddressSpace(unsigned AS) const {
     if (AS < LangAS::Offset || AS >= LangAS::Offset + LangAS::Count)
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to