ychen updated this revision to Diff 339906.
ychen added a comment.

fix typo.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100739/new/

https://reviews.llvm.org/D100739

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGenCoroutines/coro-alloc.cpp
  clang/test/CodeGenCoroutines/coro-builtins.c
  clang/test/CodeGenCoroutines/coro-gro.cpp
  llvm/docs/Coroutines.rst
  llvm/include/llvm/IR/Intrinsics.td
  llvm/lib/Transforms/Coroutines/CoroFrame.cpp
  llvm/lib/Transforms/Coroutines/CoroInstr.h
  llvm/lib/Transforms/Coroutines/CoroInternal.h
  llvm/lib/Transforms/Coroutines/CoroSplit.cpp
  llvm/lib/Transforms/Coroutines/Coroutines.cpp
  llvm/test/Transforms/Coroutines/coro-overalign.ll

Index: llvm/test/Transforms/Coroutines/coro-overalign.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/Coroutines/coro-overalign.ll
@@ -0,0 +1,81 @@
+; Check that we will emit extra code to handle overaligned frame.
+; RUN: opt < %s -coro-split -S | FileCheck %s
+; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+
+%PackedStruct = type <{ i64 }>
+
+declare void @consume(%PackedStruct*)
+
+define i8* @f() "coroutine.presplit"="1" {
+entry:
+  %data = alloca %PackedStruct, align 32
+  %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.aligned.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  call void @consume(%PackedStruct* %data)
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume
+                                i8 1, label %cleanup]
+resume:
+  call void @consume(%PackedStruct* %data)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; See if the frame pointer was inserted.
+; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i8*, i1, [7 x i8], %PackedStruct }
+
+; See if we over-allocate, adjust frame ptr start address and use a alloca to
+; save the raw frame pointer.
+; CHECK-LABEL: @f(
+;CHECK:  %alloc.frame.ptr = alloca i8*, align 8
+;CHECK:  %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* bitcast ([3 x void (%f.Frame*)*]* @f.resumers to i8*))
+;CHECK:  %alloc = call i8* @malloc(i32 56)
+;CHECK:  store i8* %alloc, i8** %alloc.frame.ptr, align 8
+;CHECK:  %intptr = ptrtoint i8* %alloc to i64
+;CHECK:  %over_boundary = add i64 %intptr, 31
+;CHECK:  %aligned_intptr = and i64 %over_boundary, -32
+;CHECK:  %diff = sub i64 %aligned_intptr, %intptr
+;CHECK:  %aligned_result = getelementptr i8, i8* %alloc, i64 %diff
+;CHECK:  call void @llvm.assume(i1 true) [ "align"(i8* %aligned_result, i64 32) ]
+;CHECK:  %hdl = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %aligned_result)
+
+; See if we emit correct deallocation code.
+
+; CHECK-LABEL: @f.resume(
+; CHECK:      %0 = getelementptr %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
+; CHECK-NEXT: %raw.frame.ptr = load i8*, i8** %0, align 8
+; CHECK-NEXT: call void @free(i8* %raw.frame.ptr)
+; CHECK-NEXT: ret void
+
+; CHECK-LABEL: @f.destroy(
+; CHECK:      %0 = getelementptr %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
+; CHECK-NEXT: %raw.frame.ptr = load i8*, i8** %0, align 8
+; CHECK-NEXT: call void @free(i8* %raw.frame.ptr)
+; CHECK-NEXT: ret void
+
+; CHECK-LABEL: @f.cleanup(
+; CHECK:      call void @free(i8* null)
+; CHECK-NEXT: ret void
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.aligned.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare noalias i8* @malloc(i32)
+declare void @free(i8*)
Index: llvm/lib/Transforms/Coroutines/Coroutines.cpp
===================================================================
--- llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -22,6 +22,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -171,7 +172,7 @@
 
 // Replace all coro.frees associated with the provided CoroId either with 'null'
 // if Elide is true and with its frame parameter otherwise.
-void coro::replaceCoroFree(CoroIdInst *CoroId, bool Elide) {
+void coro::replaceCoroFree(CoroIdInst *CoroId, bool Elide, Shape *Shape) {
   SmallVector<CoroFreeInst *, 4> CoroFrees;
   for (User *U : CoroId->users())
     if (auto CF = dyn_cast<CoroFreeInst>(U))
@@ -180,9 +181,25 @@
   if (CoroFrees.empty())
     return;
 
-  Value *Replacement =
-      Elide ? ConstantPointerNull::get(Type::getInt8PtrTy(CoroId->getContext()))
-            : CoroFrees.front()->getFrame();
+  LLVMContext &Ctx = CoroId->getContext();
+  PointerType *Int8PtrTy = Type::getInt8PtrTy(Ctx);
+  Value *Replacement = Elide ? ConstantPointerNull::get(Int8PtrTy)
+                             : CoroFrees.front()->getFrame();
+
+  if (!Elide && Shape && Shape->SwitchLowering.FramePtrField) {
+    unsigned FramePtrField = *Shape->SwitchLowering.FramePtrField;
+    for (CoroFreeInst *CF : CoroFrees) {
+      IRBuilder<> Builder(CF);
+      Value *FramePtr =
+          Builder.CreateBitCast(Replacement, Shape->FrameTy->getPointerTo());
+      Value *GEP = Builder.CreateConstGEP2_32(Shape->FrameTy, FramePtr, 0,
+                                              FramePtrField);
+      Value *LI = Builder.CreateLoad(Int8PtrTy, GEP, "raw.frame.ptr");
+      CF->replaceAllUsesWith(LI);
+      CF->eraseFromParent();
+    }
+    return;
+  }
 
   for (CoroFreeInst *CF : CoroFrees) {
     CF->replaceAllUsesWith(Replacement);
@@ -268,6 +285,9 @@
       case Intrinsic::coro_size:
         CoroSizes.push_back(cast<CoroSizeInst>(II));
         break;
+      case Intrinsic::coro_size_aligned:
+        CoroSizeAligneds.push_back(cast<CoroSizeAlignedInst>(II));
+        break;
       case Intrinsic::coro_frame:
         CoroFrames.push_back(cast<CoroFrameInst>(II));
         break;
@@ -375,6 +395,7 @@
     this->SwitchLowering.ResumeSwitch = nullptr;
     this->SwitchLowering.PromiseAlloca = SwitchId->getPromise();
     this->SwitchLowering.ResumeEntryBlock = nullptr;
+    this->SwitchLowering.FramePtrField = None;
 
     for (auto AnySuspend : CoroSuspends) {
       auto Suspend = dyn_cast<CoroSuspendInst>(AnySuspend);
Index: llvm/lib/Transforms/Coroutines/CoroSplit.cpp
===================================================================
--- llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -962,7 +962,8 @@
   // to suppress deallocation code.
   if (Shape.ABI == coro::ABI::Switch)
     coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
-                          /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup);
+                          /*Elide=*/FKind == CoroCloner::Kind::SwitchCleanup,
+                          &Shape);
 }
 
 // Create a resume clone by cloning the body of the original function, setting
@@ -1001,19 +1002,35 @@
   if (Shape.ABI == coro::ABI::Async)
     updateAsyncFuncPointerContextSize(Shape);
 
-  if (Shape.CoroSizes.empty())
-    return;
+  if (!Shape.CoroSizes.empty()) {
+    // In the same function all coro.sizes should have the same result type.
+    auto *SizeIntrin = Shape.CoroSizes.back();
+    Module *M = SizeIntrin->getModule();
+    const DataLayout &DL = M->getDataLayout();
+    auto Size = DL.getTypeAllocSize(Shape.FrameTy);
+    auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
+
+    for (CoroSizeInst *CS : Shape.CoroSizes) {
+      CS->replaceAllUsesWith(SizeConstant);
+      CS->eraseFromParent();
+    }
+  }
+
+  if (!Shape.CoroSizeAligneds.empty()) {
+    auto *SizeIntrin = Shape.CoroSizeAligneds.back();
+    Module *M = SizeIntrin->getModule();
+    const DataLayout &DL = M->getDataLayout();
+    auto Size = DL.getTypeAllocSize(Shape.FrameTy);
 
-  // In the same function all coro.sizes should have the same result type.
-  auto *SizeIntrin = Shape.CoroSizes.back();
-  Module *M = SizeIntrin->getModule();
-  const DataLayout &DL = M->getDataLayout();
-  auto Size = DL.getTypeAllocSize(Shape.FrameTy);
-  auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
+    uint64_t FrameAlign = Shape.FrameAlign.value();
+    uint64_t NewAlign = Shape.getSwitchCoroId()->getAlignment();
+    uint64_t Extra = FrameAlign > NewAlign ? FrameAlign - NewAlign : 0;
+    auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size + Extra);
 
-  for (CoroSizeInst *CS : Shape.CoroSizes) {
-    CS->replaceAllUsesWith(SizeConstant);
-    CS->eraseFromParent();
+    for (CoroSizeAlignedInst *CS : Shape.CoroSizeAligneds) {
+      CS->replaceAllUsesWith(SizeConstant);
+      CS->eraseFromParent();
+    }
   }
 }
 
@@ -1250,7 +1267,7 @@
   switch (Shape.ABI) {
   case coro::ABI::Switch: {
     auto SwitchId = cast<CoroIdInst>(CoroId);
-    coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr);
+    coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr, &Shape);
     if (AllocInst) {
       IRBuilder<> Builder(AllocInst);
       auto *Frame = Builder.CreateAlloca(Shape.FrameTy);
Index: llvm/lib/Transforms/Coroutines/CoroInternal.h
===================================================================
--- llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -44,10 +44,11 @@
 #define CORO_DEVIRT_TRIGGER_FN "coro.devirt.trigger"
 
 namespace coro {
+struct Shape;
 
 bool declaresIntrinsics(const Module &M,
                         const std::initializer_list<StringRef>);
-void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
+void replaceCoroFree(CoroIdInst *CoroId, bool Elide, Shape *Shape = nullptr);
 void updateCallGraph(Function &Caller, ArrayRef<Function *> Funcs,
                      CallGraph &CG, CallGraphSCC &SCC);
 /// Recover a dbg.declare prepared by the frontend and emit an alloca
@@ -99,6 +100,7 @@
   CoroBeginInst *CoroBegin;
   SmallVector<AnyCoroEndInst *, 4> CoroEnds;
   SmallVector<CoroSizeInst *, 2> CoroSizes;
+  SmallVector<CoroSizeAlignedInst *, 2> CoroSizeAligneds;
   SmallVector<AnyCoroSuspendInst *, 4> CoroSuspends;
   SmallVector<CallInst*, 2> SwiftErrorOps;
 
@@ -132,6 +134,7 @@
     AllocaInst *PromiseAlloca;
     BasicBlock *ResumeEntryBlock;
     unsigned IndexField;
+    Optional<unsigned> FramePtrField;
     bool HasFinalSuspend;
   };
 
@@ -268,7 +271,6 @@
   /// \param CG - if non-null, will be updated for the new call
   void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const;
 
-  Shape() = default;
   explicit Shape(Function &F, bool ReuseFrameSlot = false)
       : ReuseFrameSlot(ReuseFrameSlot) {
     buildFrom(F);
Index: llvm/lib/Transforms/Coroutines/CoroInstr.h
===================================================================
--- llvm/lib/Transforms/Coroutines/CoroInstr.h
+++ llvm/lib/Transforms/Coroutines/CoroInstr.h
@@ -121,6 +121,10 @@
                : cast<AllocaInst>(Arg->stripPointerCasts());
   }
 
+  unsigned getAlignment() const {
+    return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue();
+  }
+
   void clearPromise() {
     Value *Arg = getArgOperand(PromiseArg);
     setArgOperand(PromiseArg,
@@ -599,6 +603,18 @@
   }
 };
 
+/// This represents the llvm.coro.size.aligned instruction.
+class LLVM_LIBRARY_VISIBILITY CoroSizeAlignedInst : public IntrinsicInst {
+public:
+  // Methods to support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::coro_size_aligned;
+  }
+  static bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
 class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst {
   enum { FrameArg, UnwindArg };
 
Index: llvm/lib/Transforms/Coroutines/CoroFrame.cpp
===================================================================
--- llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -14,17 +14,21 @@
 // the value into the coroutine frame.
 //===----------------------------------------------------------------------===//
 
+#include "CoroInstr.h"
 #include "CoroInternal.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Analysis/PtrUseVisitor.h"
 #include "llvm/Analysis/StackLifetime.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
@@ -486,6 +490,8 @@
     return StructAlign;
   }
 
+  SmallVector<Field, 8> &getFields() { return Fields; }
+
   FieldIDType getLayoutFieldIndex(FieldIDType Id) const {
     assert(IsFinished && "not yet finished!");
     return Fields[Id].LayoutFieldIndex;
@@ -710,6 +716,54 @@
   IsFinished = true;
 }
 
+// Adapted from CodeGenFunction::EmitBuiltinAlignTo.
+static Value *emitAlignUpTo(IRBuilder<> &Builder, Value *Src, uint64_t Align) {
+  const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
+
+  auto *SrcType = cast<PointerType>(Src->getType());
+  IntegerType *IntType = IntegerType::get(Builder.getContext(),
+                                          DL.getIndexTypeSizeInBits(SrcType));
+  Value *Alignment = ConstantInt::get(IntType, Align);
+  auto *One = ConstantInt::get(IntType, 1);
+  Value *Mask = Builder.CreateSub(Alignment, One, "mask");
+  Value *SrcAddr = Builder.CreatePtrToInt(Src, IntType, "intptr");
+
+  // When aligning up we have to first add the mask to ensure we go over the
+  // next alignment value and then align down to the next valid multiple.
+  // By adding the mask, we ensure that align_up on an already aligned
+  // value will not change the value.
+  Value *SrcForMask = Builder.CreateAdd(SrcAddr, Mask, "over_boundary");
+
+  // Invert the mask to only clear the lower bits.
+  Value *InvertedMask = Builder.CreateNot(Mask, "inverted_mask");
+  Value *Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
+
+  Result->setName("aligned_intptr");
+  Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff");
+  // The result must point to the same underlying allocation. This means we
+  // can use an inbounds GEP to enable better optimization.
+
+  PointerType *DestType = Builder.getInt8PtrTy();
+  if (unsigned AddrSpace = SrcType->getAddressSpace())
+    DestType = Type::getInt8PtrTy(Builder.getContext(), AddrSpace);
+
+  Value *Base = Src;
+  if (SrcType != DestType)
+    Base = Builder.CreateBitCast(Src, DestType);
+
+  // Out-of-bound case could not happen.
+  Result = Builder.CreateGEP(Base, Difference, "aligned_result");
+  Result = Builder.CreatePointerCast(Result, SrcType);
+
+  Type *IntPtrTy = Builder.getIntPtrTy(DL);
+  if (Alignment->getType() != IntPtrTy)
+    Alignment =
+        Builder.CreateIntCast(Alignment, IntPtrTy, false, "casted.align");
+  (void)Builder.CreateAlignmentAssumption(DL, Result, Alignment);
+  assert(Result->getType() == SrcType);
+  return Result;
+}
+
 // Build a struct that will keep state for an active coroutine.
 //   struct f.frame {
 //     ResumeFnTy ResumeFnAddr;
@@ -764,21 +818,60 @@
   // Because multiple allocas may own the same field slot,
   // we add allocas to field here.
   B.addFieldForAllocas(F, FrameData, Shape);
-  // Add PromiseAlloca to Allocas list so that
-  // 1. updateLayoutIndex could update its index after
-  // `performOptimizedStructLayout`
-  // 2. it is processed in insertSpills.
-  if (Shape.ABI == coro::ABI::Switch && PromiseAlloca)
-    // We assume that the promise alloca won't be modified before
-    // CoroBegin and no alias will be create before CoroBegin.
-    FrameData.Allocas.emplace_back(
-        PromiseAlloca, DenseMap<Instruction *, llvm::Optional<APInt>>{}, false);
+
   // Create an entry for every spilled value.
   for (auto &S : FrameData.Spills) {
     FieldIDType Id = B.addField(S.first->getType(), None);
     FrameData.setFieldIndex(S.first, Id);
   }
 
+  Optional<FieldIDType> FramePtrField = None;
+  if (Shape.ABI == coro::ABI::Switch) {
+    // Add PromiseAlloca to Allocas list so that
+    // 1. updateLayoutIndex could update its index after
+    // `performOptimizedStructLayout`
+    // 2. it is processed in insertSpills.
+    if (PromiseAlloca)
+      // We assume that the promise alloca won't be modified before
+      // CoroBegin and no alias will be create before CoroBegin.
+      FrameData.Allocas.emplace_back(
+          PromiseAlloca, DenseMap<Instruction *, llvm::Optional<APInt>>{},
+          false);
+
+    Align FrameAlign =
+        std::max_element(
+            B.getFields().begin(), B.getFields().end(),
+            [](auto &F1, auto &F2) { return F1.Alignment < F2.Alignment; })
+            ->Alignment;
+
+    // Check for over-alignment.
+    if (!Shape.CoroSizeAligneds.empty() &&
+        FrameAlign > Shape.getSwitchCoroId()->getAlignment()) {
+      BasicBlock &Entry = F.getEntryBlock();
+      IRBuilder<> Builder(&Entry, Entry.getFirstInsertionPt());
+
+      // Save raw frame pointer to alloca
+      Value *Mem = Shape.CoroBegin->getMem();
+      AllocaInst *FramePtrAddr =
+          Builder.CreateAlloca(Mem->getType(), nullptr, "alloc.frame.ptr");
+      Builder.SetInsertPoint(Shape.CoroBegin);
+      Value *MockMem = Builder.CreatePointerCast(FramePtrAddr, Mem->getType());
+      Builder.CreateStore(MockMem, FramePtrAddr);
+
+      // Ajust frame pointer value.
+      Value *NewMem = emitAlignUpTo(Builder, MockMem, FrameAlign.value());
+      Mem->replaceAllUsesWith(NewMem);
+      MockMem->replaceAllUsesWith(Mem);
+      cast<Instruction>(MockMem)->eraseFromParent();
+
+      // Add alloca to frame.
+      FramePtrField = B.addFieldForAlloca(FramePtrAddr);
+      FrameData.setFieldIndex(FramePtrAddr, *FramePtrField);
+      FrameData.Allocas.emplace_back(
+          FramePtrAddr, DenseMap<Instruction *, llvm::Optional<APInt>>{}, true);
+    }
+  }
+
   B.finish(FrameTy);
   FrameData.updateLayoutIndex(B);
   Shape.FrameAlign = B.getStructAlign();
@@ -790,6 +883,10 @@
     Shape.SwitchLowering.IndexField =
         B.getLayoutFieldIndex(*SwitchIndexFieldId);
 
+    if (FramePtrField)
+      Shape.SwitchLowering.FramePtrField =
+          B.getLayoutFieldIndex(*FramePtrField);
+
     // Also round the frame size up to a multiple of its alignment, as is
     // generally expected in C/C++.
     Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
Index: llvm/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm/include/llvm/IR/Intrinsics.td
+++ llvm/include/llvm/IR/Intrinsics.td
@@ -1237,6 +1237,7 @@
 def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
 def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
 def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
+def int_coro_size_aligned : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
 
 def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>;
 def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>;
Index: llvm/docs/Coroutines.rst
===================================================================
--- llvm/docs/Coroutines.rst
+++ llvm/docs/Coroutines.rst
@@ -948,6 +948,35 @@
 The `coro.size` intrinsic is lowered to a constant representing the size of
 the coroutine frame. 
 
+.. _coro.size.aligned:
+
+'llvm.coro.size.aligned' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+::
+
+    declare i32 @llvm.coro.size.aligned.i32()
+    declare i64 @llvm.coro.size.aligned.i64()
+
+Overview:
+"""""""""
+
+The '``llvm.coro.size.aligned``' intrinsic returns the number of bytes
+allocated by a memory allocator to store a `coroutine frame`_. It is usually
+greater than or equal to '``llvm.coro.size``'.
+
+Arguments:
+""""""""""
+
+None
+
+Semantics:
+""""""""""
+
+Using this intrinsic indicates to LLVM that it should handle overaligned
+`coroutine frame`_ by requesting more memory than needed to store a
+`coroutine frame`_ to satisfy its memory alignment requirement. This is only
+supported for switched-resume coroutines.
+
 .. _coro.begin:
 
 'llvm.coro.begin' Intrinsic
Index: clang/test/CodeGenCoroutines/coro-gro.cpp
===================================================================
--- clang/test/CodeGenCoroutines/coro-gro.cpp
+++ clang/test/CodeGenCoroutines/coro-gro.cpp
@@ -48,7 +48,7 @@
   // CHECK: %[[RetVal:.+]] = alloca i32
   // CHECK: %[[GroActive:.+]] = alloca i1
 
-  // CHECK: %[[Size:.+]] = call i64 @llvm.coro.size.i64()
+  // CHECK: %[[Size:.+]] = call i64 @llvm.coro.size.aligned.i64()
   // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[Size]])
   // CHECK: store i1 false, i1* %[[GroActive]]
   // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeC1Ev(
Index: clang/test/CodeGenCoroutines/coro-builtins.c
===================================================================
--- clang/test/CodeGenCoroutines/coro-builtins.c
+++ clang/test/CodeGenCoroutines/coro-builtins.c
@@ -20,7 +20,7 @@
   // CHECK-NEXT: call i8* @llvm.coro.noop()
   __builtin_coro_noop();
 
-  // CHECK-NEXT: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
+  // CHECK-NEXT: %[[SIZE:.+]] = call i64 @llvm.coro.size.aligned.i64()
   // CHECK-NEXT: %[[MEM:.+]] = call i8* @myAlloc(i64 %[[SIZE]])
   // CHECK-NEXT: %[[FRAME:.+]] = call i8* @llvm.coro.begin(token %[[COROID]], i8* %[[MEM]])
   __builtin_coro_begin(myAlloc(__builtin_coro_size()));
Index: clang/test/CodeGenCoroutines/coro-alloc.cpp
===================================================================
--- clang/test/CodeGenCoroutines/coro-alloc.cpp
+++ clang/test/CodeGenCoroutines/coro-alloc.cpp
@@ -60,7 +60,7 @@
   // CHECK: br i1 %[[NeedAlloc]], label %[[AllocBB:.+]], label %[[InitBB:.+]]
 
   // CHECK: [[AllocBB]]:
-  // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
+  // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.aligned.i64()
   // CHECK: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[SIZE]])
   // CHECK: br label %[[InitBB]]
 
@@ -97,7 +97,7 @@
 // CHECK-LABEL: f1(
 extern "C" void f1(promise_new_tag ) {
   // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16
-  // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
+  // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.aligned.i64()
   // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv15promise_new_tagEE12promise_typenwEm(i64 %[[SIZE]])
 
   // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin(
@@ -126,7 +126,7 @@
   // CHECK: store float %y, float* %y.addr, align 4
   // CHECK: store double %z, double* %z.addr, align 8
   // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16
-  // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
+  // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.aligned.i64()
   // CHECK: %[[INT:.+]] = load i32, i32* %x.addr, align 4
   // CHECK: %[[FLOAT:.+]] = load float, float* %y.addr, align 4
   // CHECK: %[[DOUBLE:.+]] = load double, double* %z.addr, align 8
@@ -176,7 +176,7 @@
 // CHECK-LABEL: f2(
 extern "C" void f2(promise_delete_tag) {
   // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16
-  // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
+  // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.aligned.i64()
   // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[SIZE]])
 
   // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin(
@@ -201,12 +201,12 @@
 // CHECK-LABEL: f3(
 extern "C" void f3(promise_sized_delete_tag) {
   // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16
-  // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
+  // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.aligned.i64()
   // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[SIZE]])
 
   // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin(
   // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]])
-  // CHECK: %[[SIZE2:.+]] = call i64 @llvm.coro.size.i64()
+  // CHECK: %[[SIZE2:.+]] = call i64 @llvm.coro.size.aligned.i64()
   // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv24promise_sized_delete_tagEE12promise_typedlEPvm(i8* %[[MEM]], i64 %[[SIZE2]])
   co_return;
 }
@@ -229,7 +229,7 @@
   // CHECK: %[[RetVal:.+]] = alloca i32
   // CHECK: %[[Gro:.+]] = alloca i32
   // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16
-  // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
+  // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.aligned.i64()
   // CHECK: %[[MEM:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[SIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow)
   // CHECK: %[[OK:.+]] = icmp ne i8* %[[MEM]], null
   // CHECK: br i1 %[[OK]], label %[[OKBB:.+]], label %[[ERRBB:.+]]
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -4433,7 +4433,7 @@
     auto & Context = getContext();
     auto SizeTy = Context.getSizeType();
     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
-    Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
+    Function *F = CGM.getIntrinsic(Intrinsic::coro_size_aligned, T);
     return RValue::get(Builder.CreateCall(F));
   }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to