ychen updated this revision to Diff 344602.
ychen added a comment.

- Rebase on updated D102145 <https://reviews.llvm.org/D102145> (use 
`llvm.coro.raw.frame.ptr.addr` during allocation)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97915/new/

https://reviews.llvm.org/D97915

Files:
  clang/include/clang/AST/StmtCXX.h
  clang/lib/AST/StmtCXX.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CGCoroutine.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/Sema/SemaCoroutine.cpp
  clang/lib/Sema/TreeTransform.h
  clang/test/CodeGenCoroutines/coro-alloc.cpp
  clang/test/CodeGenCoroutines/coro-cleanup.cpp
  clang/test/CodeGenCoroutines/coro-gro.cpp

Index: clang/test/CodeGenCoroutines/coro-gro.cpp
===================================================================
--- clang/test/CodeGenCoroutines/coro-gro.cpp
+++ clang/test/CodeGenCoroutines/coro-gro.cpp
@@ -68,6 +68,7 @@
   // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeD1Ev(
   // CHECK: %[[Mem:.+]] = call i8* @llvm.coro.free(
   // CHECK: call void @_ZdlPv(i8* %[[Mem]])
+  // CHECK: call void @_ZdlPv(i8* %{{.*}})
 
   // Initialize retval from Gro and destroy Gro
 
Index: clang/test/CodeGenCoroutines/coro-cleanup.cpp
===================================================================
--- clang/test/CodeGenCoroutines/coro-cleanup.cpp
+++ clang/test/CodeGenCoroutines/coro-cleanup.cpp
@@ -78,12 +78,46 @@
 
   // CHECK: [[Cleanup]]:
   // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_typeD1Ev(
-  // CHECK: %[[Mem0:.+]] = call i8* @llvm.coro.free(
-  // CHECK: call void @_ZdlPv(i8* %[[Mem0]]
+  // CHECK: %[[MEM0:.+]] = call i8* @llvm.coro.free(
+  // CHECK: br i1 %{{.*}}, label %[[CheckAlignBB:.+]], label %[[Afterwards:.+]]
+
+  // CHECK: [[CheckAlignBB]]:
+  // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64()
+  // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]],
+  // CHECK: br i1 %[[CMP]], label %[[AlignedFreeBB:.+]], label %[[FreeBB:.+]]
+
+  // CHECK: [[FreeBB]]:
+  // CHECK: call void @_ZdlPv(i8* %[[MEM0]]
+  // CHECK: br label %[[Afterwards]]
+
+  // CHECK: [[AlignedFreeBB]]:
+  // CHECK-NEXT: %[[OFFSET:.+]] = call i32 @llvm.coro.raw.frame.ptr.offset.i32()
+  // CHECK-NEXT: %[[ADDR:.+]] = getelementptr inbounds i8, i8* %[[MEM0]], i32 %[[OFFSET]]
+  // CHECK-NEXT: %[[ADDR2:.+]] = bitcast i8* %[[ADDR]] to i8**
+  // CHECK-NEXT: %[[MEM:.+]] = load i8*, i8** %[[ADDR2]], align 8
+  // CHECK-NEXT: call void @_ZdlPv(i8* %[[MEM]])
+  // CHECK-NEXT: br label %[[Afterwards]]
 
   // CHECK: [[Dealloc]]:
-  // CHECK:   %[[Mem:.+]] = call i8* @llvm.coro.free(
-  // CHECK:   call void @_ZdlPv(i8* %[[Mem]])
+  // CHECK: %[[MEM0:.+]] = call i8* @llvm.coro.free(
+  // CHECK: br i1 %{{.*}}, label %[[CheckAlignBB:.+]], label %[[Afterwards:.+]]
+
+  // CHECK: [[CheckAlignBB]]:
+  // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64()
+  // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]],
+  // CHECK: br i1 %[[CMP]], label %[[AlignedFreeBB:.+]], label %[[FreeBB:.+]]
+
+  // CHECK: [[FreeBB]]:
+  // CHECK: call void @_ZdlPv(i8* %[[MEM0]]
+  // CHECK: br label %[[Afterwards]]
+
+  // CHECK: [[AlignedFreeBB]]:
+  // CHECK-NEXT: %[[OFFSET:.+]] = call i32 @llvm.coro.raw.frame.ptr.offset.i32()
+  // CHECK-NEXT: %[[ADDR:.+]] = getelementptr inbounds i8, i8* %[[MEM0]], i32 %[[OFFSET]]
+  // CHECK-NEXT: %[[ADDR2:.+]] = bitcast i8* %[[ADDR]] to i8**
+  // CHECK-NEXT: %[[MEM:.+]] = load i8*, i8** %[[ADDR2]], align 8
+  // CHECK-NEXT: call void @_ZdlPv(i8* %[[MEM]])
+  // CHECK-NEXT: br label %[[Afterwards]]
 
   co_return;
 }
Index: clang/test/CodeGenCoroutines/coro-alloc.cpp
===================================================================
--- clang/test/CodeGenCoroutines/coro-alloc.cpp
+++ clang/test/CodeGenCoroutines/coro-alloc.cpp
@@ -57,24 +57,55 @@
 extern "C" void f0(global_new_delete_tag) {
   // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16
   // CHECK: %[[NeedAlloc:.+]] = call i1 @llvm.coro.alloc(token %[[ID]])
-  // CHECK: br i1 %[[NeedAlloc]], label %[[AllocBB:.+]], label %[[InitBB:.+]]
+  // CHECK: br i1 %[[NeedAlloc]], label %[[CheckAlignBB:.+]], label %[[InitBB:.+]]
+
+  // CHECK: [[CheckAlignBB]]:
+  // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64()
+  // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], 16
+  // CHECK: br i1 %[[CMP]], label %[[AlignAllocBB:.+]], label %[[AllocBB:.+]]
 
   // CHECK: [[AllocBB]]:
+  // CHECK-NEXT: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
+  // CHECK-NEXT: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[SIZE]])
+  // CHECK-NEXT: br label %[[InitBB:.+]]
+
+  // CHECK: [[AlignAllocBB]]:
   // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
-  // CHECK: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[SIZE]])
+  // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64()
+  // CHECK: %[[PAD:.+]] = sub nsw i64 %[[ALIGN]], 16
+  // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %[[PAD]]
+  // CHECK: %[[MEM2:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[NEWSIZE]])
+  // CHECK: %[[ALIGN2:.+]] = call i64 @llvm.coro.align.i64()
+  // CHECK: %[[ALIGNED:.+]] = getelementptr inbounds i8, i8* %[[MEM2]],
+  // CHECK: call void @llvm.assume(i1 true) [ "align"(i8* %[[ALIGNED]], i64 %[[ALIGN2]]) ]
+  // CHECK: %[[ADDR:.+]] = call i8** @llvm.coro.raw.frame.ptr.addr()
+  // CHECK: store i8* %[[MEM2]], i8** %[[ADDR]], align 8
   // CHECK: br label %[[InitBB]]
 
   // CHECK: [[InitBB]]:
-  // CHECK: %[[PHI:.+]] = phi i8* [ null, %{{.+}} ], [ %call, %[[AllocBB]] ]
+  // CHECK: %[[PHI:.+]] = phi i8* [ null, %{{.+}} ], [ %[[MEM]], %[[AllocBB]] ], [ %[[ALIGNED]], %[[AlignAllocBB]] ]
   // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin(token %[[ID]], i8* %[[PHI]])
 
   // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]])
   // CHECK: %[[NeedDealloc:.+]] = icmp ne i8* %[[MEM]], null
-  // CHECK: br i1 %[[NeedDealloc]], label %[[FreeBB:.+]], label %[[Afterwards:.+]]
+  // CHECK: br i1 %[[NeedDealloc]], label %[[CheckAlignBB:.+]], label %[[Afterwards:.+]]
+
+  // CHECK: [[CheckAlignBB]]:
+  // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64()
+  // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], 16
+  // CHECK: br i1 %[[CMP]], label %[[AlignedFreeBB:.+]], label %[[FreeBB:.+]]
 
   // CHECK: [[FreeBB]]:
-  // CHECK: call void @_ZdlPv(i8* %[[MEM]])
-  // CHECK: br label %[[Afterwards]]
+  // CHECK-NEXT: call void @_ZdlPv(i8* %[[MEM]])
+  // CHECK-NEXT: br label %[[Afterwards]]
+
+  // CHECK: [[AlignedFreeBB]]:
+  // CHECK-NEXT: %[[OFFSET:.+]] = call i32 @llvm.coro.raw.frame.ptr.offset.i32()
+  // CHECK-NEXT: %[[ADDR:.+]] = getelementptr inbounds i8, i8* %[[MEM]], i32 %[[OFFSET]]
+  // CHECK-NEXT: %[[ADDR2:.+]] = bitcast i8* %[[ADDR]] to i8**
+  // CHECK-NEXT: %[[MEM:.+]] = load i8*, i8** %[[ADDR2]], align 8
+  // CHECK-NEXT: call void @_ZdlPv(i8* %[[MEM]])
+  // CHECK-NEXT: br label %[[Afterwards]]
 
   // CHECK: [[Afterwards]]:
   // CHECK: ret void
@@ -157,6 +188,7 @@
 // CHECK-LABEL: f1b(
 extern "C" void f1b(promise_matching_global_placement_new_tag, dummy *) {
   // CHECK: call noalias nonnull i8* @_Znwm(i64
+  // CHECK-NOT: call noalias nonnull i8* @_ZnwmSt11align_val_t(i64
   co_return;
 }
 
@@ -182,6 +214,7 @@
   // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin(
   // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]])
   // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv18promise_delete_tagEE12promise_typedlEPv(i8* %[[MEM]])
+  // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv18promise_delete_tagEE12promise_typedlEPv(i8*
   co_return;
 }
 
@@ -229,16 +262,41 @@
   // CHECK: %[[RetVal:.+]] = alloca i32
   // CHECK: %[[Gro:.+]] = alloca i32
   // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16
+  // CHECK: br i1 %{{.*}}, label %[[CheckAlignBB:.+]], label %[[OKBB:.+]]
+
+  // CHECK: [[CheckAlignBB]]:
+  // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64()
+  // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], 16
+  // CHECK: br i1 %[[CMP]], label %[[AlignAllocBB:.+]], label %[[AllocBB:.+]]
+
+  // CHECK: [[AllocBB]]:
   // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
   // CHECK: %[[MEM:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[SIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow)
   // CHECK: %[[OK:.+]] = icmp ne i8* %[[MEM]], null
-  // CHECK: br i1 %[[OK]], label %[[OKBB:.+]], label %[[ERRBB:.+]]
+  // CHECK: br i1 %[[OK]], label %[[OKBB]], label %[[ERRBB:.+]]
 
   // CHECK: [[ERRBB]]:
   // CHECK:   %[[FailRet:.+]] = call i32 @_ZNSt12experimental16coroutine_traitsIJi28promise_on_alloc_failure_tagEE12promise_type39get_return_object_on_allocation_failureEv(
   // CHECK:   store i32 %[[FailRet]], i32* %[[RetVal]]
   // CHECK:   br label %[[RetBB:.+]]
 
+  // CHECK: [[AlignAllocBB]]:
+  // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
+  // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64()
+  // CHECK: %[[PAD:.+]] = sub nsw i64 %[[ALIGN]], 16
+  // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %[[PAD]]
+  // CHECK: %[[MEM2:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[NEWSIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow)
+  // CHECK: %[[OK:.+]] = icmp ne i8* %[[MEM2]], null
+  // CHECK: br i1 %[[OK]], label %[[AlignAllocBB2:.+]], label %[[ERRBB:.+]]
+
+  // CHECK: [[AlignAllocBB2]]:
+  // CHECK: %[[ALIGN2:.+]] = call i64 @llvm.coro.align.i64()
+  // CHECK: %[[ALIGNED:.+]] = getelementptr inbounds i8, i8* %[[MEM2]],
+  // CHECK: call void @llvm.assume(i1 true) [ "align"(i8* %[[ALIGNED]], i64 %[[ALIGN2]]) ]
+  // CHECK: %[[ADDR:.+]] = call i8** @llvm.coro.raw.frame.ptr.addr()
+  // CHECK: store i8* %[[MEM2]], i8** %[[ADDR]], align 8
+  // CHECK: br label %[[OKBB]]
+
   // CHECK: [[OKBB]]:
   // CHECK:   %[[OkRet:.+]] = call i32 @_ZNSt12experimental16coroutine_traitsIJi28promise_on_alloc_failure_tagEE12promise_type17get_return_objectEv(
   // CHECK:   store i32 %[[OkRet]], i32* %[[Gro]]
Index: clang/lib/Sema/TreeTransform.h
===================================================================
--- clang/lib/Sema/TreeTransform.h
+++ clang/lib/Sema/TreeTransform.h
@@ -7817,6 +7817,19 @@
       return StmtError();
     Builder.Deallocate = DeallocRes.get();
 
+    assert(S->getAlignedAllocate() && S->getAlignedDeallocate() &&
+           "aligned allocation and deallocation calls must already be built");
+    ExprResult AlignedAllocRes = getDerived().TransformExpr(S->getAllocate());
+    if (AlignedAllocRes.isInvalid())
+      return StmtError();
+    Builder.AlignedAllocate = AlignedAllocRes.get();
+
+    ExprResult AlignedDeallocRes =
+        getDerived().TransformExpr(S->getDeallocate());
+    if (AlignedDeallocRes.isInvalid())
+      return StmtError();
+    Builder.AlignedDeallocate = AlignedDeallocRes.get();
+
     assert(S->getResultDecl() && "ResultDecl must already be built");
     StmtResult ResultDecl = getDerived().TransformStmt(S->getResultDecl());
     if (ResultDecl.isInvalid())
Index: clang/lib/Sema/SemaCoroutine.cpp
===================================================================
--- clang/lib/Sema/SemaCoroutine.cpp
+++ clang/lib/Sema/SemaCoroutine.cpp
@@ -1423,7 +1423,9 @@
     return false;
 
   this->Allocate = NewExpr.get();
+  this->AlignedAllocate = this->Allocate;
   this->Deallocate = DeleteExpr.get();
+  this->AlignedDeallocate = this->Deallocate;
 
   return true;
 }
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -1917,6 +1917,8 @@
   void EmitOpenCLKernelMetadata(const FunctionDecl *FD,
                                 llvm::Function *Fn);
 
+  llvm::Value *EmitBuiltinAlignTo(void *Args, const Expr *E, bool AlignUp);
+
 public:
   CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false);
   ~CodeGenFunction();
@@ -4060,6 +4062,8 @@
   RValue EmitBuiltinIsAligned(const CallExpr *E);
   /// Emit IR for __builtin_align_up/__builtin_align_down.
   RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp);
+  llvm::Value *EmitBuiltinAlignTo(llvm::Value *Src, llvm::Value *Align,
+                                  const Expr *E, bool AlignUp);
 
   llvm::Function *generateBuiltinOSLogHelperFunction(
       const analyze_os_log::OSLogBufferLayout &Layout,
Index: clang/lib/CodeGen/CGCoroutine.cpp
===================================================================
--- clang/lib/CodeGen/CGCoroutine.cpp
+++ clang/lib/CodeGen/CGCoroutine.cpp
@@ -12,9 +12,12 @@
 
 #include "CGCleanup.h"
 #include "CodeGenFunction.h"
-#include "llvm/ADT/ScopeExit.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/AST/StmtVisitor.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include <cstdint>
 
 using namespace clang;
 using namespace CodeGen;
@@ -75,6 +78,7 @@
   // Stores the last emitted coro.free for the deallocate expressions, we use it
   // to wrap dealloc code with if(auto mem = coro.free) dealloc(mem).
   llvm::CallInst *LastCoroFree = nullptr;
+  bool LastCoroFreeUsedForDealloc = false;
 
   // If coro.id came from the builtin, remember the expression to give better
   // diagnostic. If CoroIdExpr is nullptr, the coro.id was created by
@@ -412,12 +416,94 @@
     }
   }
 };
+
+void overAllocateFrame(CodeGenFunction &CGF, llvm::CallInst *CI, bool IsAlloc) {
+  unsigned CoroSizeIdx = IsAlloc ? 0 : 1;
+  CGBuilderTy &Builder = CGF.Builder;
+  auto OrigIP = Builder.saveIP();
+  Builder.SetInsertPoint(CI);
+  llvm::Function *CoroAlign =
+      CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_align, CGF.SizeTy);
+  const auto &TI = CGF.CGM.getContext().getTargetInfo();
+  unsigned AlignOfNew = TI.getNewAlign() / TI.getCharWidth();
+  auto *AlignCall = Builder.CreateCall(CoroAlign);
+  auto *AlignOfNewInt = llvm::ConstantInt::get(CGF.SizeTy, AlignOfNew, true);
+  auto *Diff = Builder.CreateNSWSub(AlignCall, AlignOfNewInt);
+  auto *NewCoroSize = Builder.CreateAdd(CI->getArgOperand(CoroSizeIdx), Diff);
+  CI->setArgOperand(CoroSizeIdx, NewCoroSize);
+  Builder.restoreIP(OrigIP);
+}
+
+bool hasAlignArg(llvm::CallInst *MemCall) {
+  return llvm::any_of(MemCall->args(), [](llvm::Use &Arg) {
+    auto *FrameAlign = dyn_cast<llvm::IntrinsicInst>(&Arg);
+    return FrameAlign &&
+           FrameAlign->getIntrinsicID() == llvm::Intrinsic::coro_align;
+  });
+}
+
+void emitDynamicAlignedDealloc(CodeGenFunction &CGF,
+                               llvm::BasicBlock *AlignedFreeBB,
+                               llvm::CallInst *CoroFree) {
+  llvm::CallInst *Dealloc = nullptr;
+  for (llvm::User *U : CoroFree->users()) {
+    if (auto *CI = dyn_cast<llvm::CallInst>(U))
+      if (CI->getParent() == CGF.Builder.GetInsertBlock())
+        Dealloc = CI;
+  }
+  assert(Dealloc);
+
+  CGF.Builder.SetInsertPoint(AlignedFreeBB->getFirstNonPHI());
+
+  // Replace `coro.free` argument with the address from coroutine frame.
+
+  llvm::Function *RawFramePtrOffsetIntrin = CGF.CGM.getIntrinsic(
+      llvm::Intrinsic::coro_raw_frame_ptr_offset, CGF.Int32Ty);
+  auto *RawFramePtrOffset = CGF.Builder.CreateCall(RawFramePtrOffsetIntrin);
+  auto *FramePtrAddrStart =
+      CGF.Builder.CreateInBoundsGEP(CoroFree, {RawFramePtrOffset});
+  auto *FramePtrAddr = CGF.Builder.CreatePointerCast(
+      FramePtrAddrStart, CGF.Int8PtrTy->getPointerTo());
+  auto *FramePtr =
+      CGF.Builder.CreateLoad({FramePtrAddr, CGF.getPointerAlign()});
+  Dealloc->setArgOperand(0, FramePtr);
+
+  // Match size_t argument with the one used during allocation.
+
+  assert(Dealloc->getNumArgOperands() >= 1);
+  if (Dealloc->getNumArgOperands() > 1) {
+    // Size may only be the second argument of allocator call.
+    if (auto *CoroSize =
+            dyn_cast<llvm::IntrinsicInst>(Dealloc->getArgOperand(1)))
+      if (CoroSize->getIntrinsicID() == llvm::Intrinsic::coro_size)
+        overAllocateFrame(CGF, Dealloc, /*IsAlloc*/ false);
+  }
+
+  CGF.Builder.SetInsertPoint(AlignedFreeBB);
+}
+
+void emitCheckAlignBasicBlock(CodeGenFunction &CGF,
+                              llvm::BasicBlock *CheckAlignBB,
+                              llvm::BasicBlock *AlignBB,
+                              llvm::BasicBlock *NonAlignBB) {
+  CGF.EmitBlock(CheckAlignBB);
+
+  auto &Builder = CGF.Builder;
+  auto &TI = CGF.CGM.getContext().getTargetInfo();
+  unsigned NewAlign = TI.getNewAlign() / TI.getCharWidth();
+  auto *CoroAlign = Builder.CreateCall(
+      CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_align, CGF.SizeTy));
+  auto *AlignOfNew = llvm::ConstantInt::get(CGF.SizeTy, NewAlign);
+  auto *Cmp =
+      Builder.CreateICmp(llvm::CmpInst::ICMP_UGT, CoroAlign, AlignOfNew);
+  Builder.CreateCondBr(Cmp, AlignBB, NonAlignBB);
 }
 
-namespace {
 // Make sure to call coro.delete on scope exit.
 struct CallCoroDelete final : public EHScopeStack::Cleanup {
   Stmt *Deallocate;
+  Stmt *AlignedDeallocate;
+  bool DynamicAlignedDealloc;
 
   // Emit "if (coro.free(CoroId, CoroBegin)) Deallocate;"
 
@@ -432,21 +518,34 @@
     // call.
     BasicBlock *SaveInsertBlock = CGF.Builder.GetInsertBlock();
 
+    auto *CheckAlignBB = CGF.createBasicBlock("coro.free.check.align");
+    auto *AlignedFreeBB = CGF.createBasicBlock("coro.free.align");
     auto *FreeBB = CGF.createBasicBlock("coro.free");
+    auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free");
+
+    emitCheckAlignBasicBlock(CGF, CheckAlignBB, AlignedFreeBB, FreeBB);
+
     CGF.EmitBlock(FreeBB);
     CGF.EmitStmt(Deallocate);
-
-    auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free");
-    CGF.EmitBlock(AfterFreeBB);
+    CGF.Builder.CreateBr(AfterFreeBB);
 
     // We should have captured coro.free from the emission of deallocate.
     auto *CoroFree = CGF.CurCoro.Data->LastCoroFree;
+    CGF.CurCoro.Data->LastCoroFreeUsedForDealloc = true;
     if (!CoroFree) {
       CGF.CGM.Error(Deallocate->getBeginLoc(),
                     "Deallocation expressoin does not refer to coro.free");
       return;
     }
 
+    CGF.EmitBlock(AlignedFreeBB);
+    CGF.EmitStmt(AlignedDeallocate);
+    CGF.CurCoro.Data->LastCoroFreeUsedForDealloc = false;
+    if (DynamicAlignedDealloc)
+      emitDynamicAlignedDealloc(CGF, AlignedFreeBB, CoroFree);
+
+    CGF.EmitBlock(AfterFreeBB);
+
     // Get back to the block we were originally and move coro.free there.
     auto *InsertPt = SaveInsertBlock->getTerminator();
     CoroFree->moveBefore(InsertPt);
@@ -455,15 +554,18 @@
     // Add if (auto *mem = coro.free) Deallocate;
     auto *NullPtr = llvm::ConstantPointerNull::get(CGF.Int8PtrTy);
     auto *Cond = CGF.Builder.CreateICmpNE(CoroFree, NullPtr);
-    CGF.Builder.CreateCondBr(Cond, FreeBB, AfterFreeBB);
+    CGF.Builder.CreateCondBr(Cond, CheckAlignBB, AfterFreeBB);
 
     // No longer need old terminator.
     InsertPt->eraseFromParent();
     CGF.Builder.SetInsertPoint(AfterFreeBB);
   }
-  explicit CallCoroDelete(Stmt *DeallocStmt) : Deallocate(DeallocStmt) {}
+  explicit CallCoroDelete(Stmt *DeallocStmt, Stmt *AlignedDeallocStmt,
+                          bool DynamicAlignedDealloc)
+      : Deallocate(DeallocStmt), AlignedDeallocate(AlignedDeallocStmt),
+        DynamicAlignedDealloc(DynamicAlignedDealloc) {}
 };
-}
+} // namespace
 
 namespace {
 struct GetReturnObjectManager {
@@ -547,9 +649,13 @@
 
   auto *EntryBB = Builder.GetInsertBlock();
   auto *AllocBB = createBasicBlock("coro.alloc");
+  auto *AlignAllocBB = createBasicBlock("coro.alloc.align");
+  auto *CheckAlignBB = createBasicBlock("coro.alloc.check.align");
   auto *InitBB = createBasicBlock("coro.init");
   auto *FinalBB = createBasicBlock("coro.final");
   auto *RetBB = createBasicBlock("coro.ret");
+  llvm::BasicBlock *RetOnFailureBB = nullptr;
+  llvm::BasicBlock *AlignAllocBB2 = nullptr;
 
   auto *CoroId = Builder.CreateCall(
       CGM.getIntrinsic(llvm::Intrinsic::coro_id),
@@ -564,7 +670,9 @@
   auto *CoroAlloc = Builder.CreateCall(
       CGM.getIntrinsic(llvm::Intrinsic::coro_alloc), {CoroId});
 
-  Builder.CreateCondBr(CoroAlloc, AllocBB, InitBB);
+  Builder.CreateCondBr(CoroAlloc, CheckAlignBB, InitBB);
+
+  emitCheckAlignBasicBlock(*this, CheckAlignBB, AlignAllocBB, AllocBB);
 
   EmitBlock(AllocBB);
   auto *AllocateCall = EmitScalarExpr(S.getAllocate());
@@ -572,10 +680,9 @@
 
   // Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided.
   if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) {
-    auto *RetOnFailureBB = createBasicBlock("coro.ret.on.failure");
+    RetOnFailureBB = createBasicBlock("coro.ret.on.failure");
 
     // See if allocation was successful.
-    auto *NullPtr = llvm::ConstantPointerNull::get(Int8PtrTy);
     auto *Cond = Builder.CreateICmpNE(AllocateCall, NullPtr);
     Builder.CreateCondBr(Cond, InitBB, RetOnFailureBB);
 
@@ -587,12 +694,48 @@
     Builder.CreateBr(InitBB);
   }
 
+  EmitBlock(AlignAllocBB);
+
+  auto *AlignedAllocateCall = EmitScalarExpr(S.getAlignedAllocate());
+  bool HasAlignArg = hasAlignArg(cast<llvm::CallInst>(AlignedAllocateCall));
+
+  if (!HasAlignArg)
+    overAllocateFrame(*this, cast<llvm::CallInst>(AlignedAllocateCall),
+                      /*IsAlloc*/ true);
+
+  if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) {
+    auto *Cond = Builder.CreateICmpNE(AlignedAllocateCall, NullPtr);
+    if (HasAlignArg) {
+      Builder.CreateCondBr(Cond, InitBB, RetOnFailureBB);
+    } else {
+      AlignAllocBB2 = createBasicBlock("coro.alloc.align2");
+      Builder.CreateCondBr(Cond, AlignAllocBB2, RetOnFailureBB);
+      EmitBlock(AlignAllocBB2);
+    }
+  }
+
+  if (!HasAlignArg) {
+    auto *CoroAlign = Builder.CreateCall(
+        CGM.getIntrinsic(llvm::Intrinsic::coro_align, SizeTy));
+    auto *AlignedUpAddr = EmitBuiltinAlignTo(AlignedAllocateCall, CoroAlign,
+                                             S.getAlignedAllocate(), true);
+    auto *RawFramePtrAddrIntrin =
+        CGM.getIntrinsic(llvm::Intrinsic::coro_raw_frame_ptr_addr);
+    auto *RawFramePtrAddr = Builder.CreateCall(RawFramePtrAddrIntrin);
+    Builder.CreateStore(AlignedAllocateCall,
+                        {RawFramePtrAddr, getPointerAlign()});
+    AlignedAllocateCall = AlignedUpAddr;
+  }
+
   EmitBlock(InitBB);
 
   // Pass the result of the allocation to coro.begin.
-  auto *Phi = Builder.CreatePHI(VoidPtrTy, 2);
+  auto *Phi = Builder.CreatePHI(VoidPtrTy, 3);
   Phi->addIncoming(NullPtr, EntryBB);
   Phi->addIncoming(AllocateCall, AllocOrInvokeContBB);
+  Phi->addIncoming(AlignedAllocateCall,
+                   AlignAllocBB2 ? AlignAllocBB2 : AlignAllocBB);
+
   auto *CoroBegin = Builder.CreateCall(
       CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi});
   CurCoro.Data->CoroBegin = CoroBegin;
@@ -605,7 +748,8 @@
     CGDebugInfo *DI = getDebugInfo();
     ParamReferenceReplacerRAII ParamReplacer(LocalDeclMap);
     CodeGenFunction::RunCleanupsScope ResumeScope(*this);
-    EHStack.pushCleanup<CallCoroDelete>(NormalAndEHCleanup, S.getDeallocate());
+    EHStack.pushCleanup<CallCoroDelete>(NormalAndEHCleanup, S.getDeallocate(),
+                                        S.getAlignedDeallocate(), !HasAlignArg);
 
     // Create mapping between parameters and copy-params for coroutine function.
     auto ParamMoves = S.getParamMoves();
@@ -729,6 +873,10 @@
   case llvm::Intrinsic::coro_alloc:
   case llvm::Intrinsic::coro_begin:
   case llvm::Intrinsic::coro_free: {
+    // Make deallocation and aligned deallocation share one `coro.free`.
+    if (CurCoro.Data && CurCoro.Data->LastCoroFreeUsedForDealloc)
+      return RValue::get(CurCoro.Data->LastCoroFree);
+
     if (CurCoro.Data && CurCoro.Data->CoroId) {
       Args.push_back(CurCoro.Data->CoroId);
       break;
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -16944,6 +16944,23 @@
     auto *One = llvm::ConstantInt::get(IntType, 1);
     Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
   }
+
+  BuiltinAlignArgs(llvm::Value *SrcV, llvm::Value *Align,
+                   CodeGenFunction &CGF) {
+    Src = SrcV;
+    SrcType = Src->getType();
+    if (SrcType->isPointerTy()) {
+      IntType = IntegerType::get(
+          CGF.getLLVMContext(),
+          CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
+    } else {
+      assert(SrcType->isIntegerTy());
+      IntType = cast<llvm::IntegerType>(SrcType);
+    }
+    Alignment = Align;
+    auto *One = llvm::ConstantInt::get(IntType, 1);
+    Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
+  }
 };
 } // namespace
 
@@ -16959,12 +16976,10 @@
       llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
 }
 
-/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
-/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
-/// llvm.ptrmask instrinsic (with a GEP before in the align_up case).
-/// TODO: actually use ptrmask once most optimization passes know about it.
-RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
-  BuiltinAlignArgs Args(E, *this);
+llvm::Value *CodeGenFunction::EmitBuiltinAlignTo(void *ArgsPtr, const Expr *E,
+                                                 bool AlignUp) {
+  assert(ArgsPtr);
+  const BuiltinAlignArgs &Args = *static_cast<BuiltinAlignArgs *>(ArgsPtr);
   llvm::Value *SrcAddr = Args.Src;
   if (Args.Src->getType()->isPointerTy())
     SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr");
@@ -17003,7 +17018,23 @@
     emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment);
   }
   assert(Result->getType() == Args.SrcType);
-  return RValue::get(Result);
+  return Result;
+}
+
+/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
+/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
+/// llvm.ptrmask instrinsic (with a GEP before in the align_up case).
+/// TODO: actually use ptrmask once most optimization passes know about it.
+RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
+  BuiltinAlignArgs Args(E, *this);
+  return RValue::get(EmitBuiltinAlignTo(&Args, E, AlignUp));
+}
+
+llvm::Value *CodeGenFunction::EmitBuiltinAlignTo(llvm::Value *Src,
+                                                 llvm::Value *Align,
+                                                 const Expr *E, bool AlignUp) {
+  BuiltinAlignArgs Args(Src, Align, *this);
+  return EmitBuiltinAlignTo(&Args, E, AlignUp);
 }
 
 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Index: clang/lib/AST/StmtCXX.cpp
===================================================================
--- clang/lib/AST/StmtCXX.cpp
+++ clang/lib/AST/StmtCXX.cpp
@@ -117,6 +117,8 @@
   SubStmts[CoroutineBodyStmt::OnFallthrough] = Args.OnFallthrough;
   SubStmts[CoroutineBodyStmt::Allocate] = Args.Allocate;
   SubStmts[CoroutineBodyStmt::Deallocate] = Args.Deallocate;
+  SubStmts[CoroutineBodyStmt::AlignedAllocate] = Args.AlignedAllocate;
+  SubStmts[CoroutineBodyStmt::AlignedDeallocate] = Args.AlignedDeallocate;
   SubStmts[CoroutineBodyStmt::ReturnValue] = Args.ReturnValue;
   SubStmts[CoroutineBodyStmt::ResultDecl] = Args.ResultDecl;
   SubStmts[CoroutineBodyStmt::ReturnStmt] = Args.ReturnStmt;
Index: clang/include/clang/AST/StmtCXX.h
===================================================================
--- clang/include/clang/AST/StmtCXX.h
+++ clang/include/clang/AST/StmtCXX.h
@@ -318,17 +318,19 @@
     : public Stmt,
       private llvm::TrailingObjects<CoroutineBodyStmt, Stmt *> {
   enum SubStmt {
-    Body,          ///< The body of the coroutine.
-    Promise,       ///< The promise statement.
-    InitSuspend,   ///< The initial suspend statement, run before the body.
-    FinalSuspend,  ///< The final suspend statement, run after the body.
-    OnException,   ///< Handler for exceptions thrown in the body.
-    OnFallthrough, ///< Handler for control flow falling off the body.
-    Allocate,      ///< Coroutine frame memory allocation.
-    Deallocate,    ///< Coroutine frame memory deallocation.
-    ReturnValue,   ///< Return value for thunk function: p.get_return_object().
-    ResultDecl,    ///< Declaration holding the result of get_return_object.
-    ReturnStmt,    ///< Return statement for the thunk function.
+    Body,              ///< The body of the coroutine.
+    Promise,           ///< The promise statement.
+    InitSuspend,       ///< The initial suspend statement, run before the body.
+    FinalSuspend,      ///< The final suspend statement, run after the body.
+    OnException,       ///< Handler for exceptions thrown in the body.
+    OnFallthrough,     ///< Handler for control flow falling off the body.
+    Allocate,          ///< Coroutine frame memory allocation.
+    Deallocate,        ///< Coroutine frame memory deallocation.
+    AlignedAllocate,   ///< Coroutine frame memory aligned allocation.
+    AlignedDeallocate, ///< Coroutine frame memory aligned deallocation.
+    ReturnValue, ///< Return value for thunk function: p.get_return_object().
+    ResultDecl,  ///< Declaration holding the result of get_return_object.
+    ReturnStmt,  ///< Return statement for the thunk function.
     ReturnStmtOnAllocFailure, ///< Return statement if allocation failed.
     FirstParamMove ///< First offset for move construction of parameter copies.
   };
@@ -353,6 +355,8 @@
     Stmt *OnFallthrough = nullptr;
     Expr *Allocate = nullptr;
     Expr *Deallocate = nullptr;
+    Expr *AlignedAllocate = nullptr;
+    Expr *AlignedDeallocate = nullptr;
     Expr *ReturnValue = nullptr;
     Stmt *ResultDecl = nullptr;
     Stmt *ReturnStmt = nullptr;
@@ -406,6 +410,12 @@
   Expr *getDeallocate() const {
     return cast_or_null<Expr>(getStoredStmts()[SubStmt::Deallocate]);
   }
+  Expr *getAlignedAllocate() const {
+    return cast_or_null<Expr>(getStoredStmts()[SubStmt::AlignedAllocate]);
+  }
+  Expr *getAlignedDeallocate() const {
+    return cast_or_null<Expr>(getStoredStmts()[SubStmt::AlignedDeallocate]);
+  }
   Expr *getReturnValueInit() const {
     return cast<Expr>(getStoredStmts()[SubStmt::ReturnValue]);
   }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to