ychen updated this revision to Diff 341418.
ychen added a comment.
- Handle deallocation.
- Fix tests.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D97915/new/
https://reviews.llvm.org/D97915
Files:
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/CodeGen/CGCoroutine.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/test/CodeGenCoroutines/coro-alloc.cpp
clang/test/CodeGenCoroutines/coro-cleanup.cpp
clang/test/CodeGenCoroutines/coro-gro.cpp
llvm/docs/Coroutines.rst
llvm/include/llvm/IR/Intrinsics.td
llvm/lib/Transforms/Coroutines/CoroFrame.cpp
llvm/lib/Transforms/Coroutines/CoroInstr.h
llvm/lib/Transforms/Coroutines/CoroInternal.h
llvm/lib/Transforms/Coroutines/CoroSplit.cpp
llvm/lib/Transforms/Coroutines/Coroutines.cpp
llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll
llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll
Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll
+++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll
@@ -62,7 +62,7 @@
call i1 @llvm.coro.end(i8* null, i1 false)
ret void
}
-; CHECK: %a.Frame = type { void (%a.Frame*)*, void (%a.Frame*)*, %"struct.task::promise_type", i1, [14 x i8], %struct.big_structure }
+; CHECK: %a.Frame = type { void (%a.Frame*)*, void (%a.Frame*)*, %"struct.task::promise_type", i1, i8*, %struct.big_structure }
; CHECK-LABEL: @a.resume(
; CHECK: %[[A:.*]] = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr, i32 0, i32 3
; CHECK: %[[A:.*]] = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr, i32 0, i32 5
Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll
+++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll
@@ -62,10 +62,10 @@
call i1 @llvm.coro.end(i8* null, i1 false)
ret void
}
-; CHECK: %a.Frame = type { void (%a.Frame*)*, void (%a.Frame*)*, %"struct.task::promise_type", %struct.big_structure, i1, [26 x i8], %struct.big_structure.2 }
+; CHECK: %a.Frame = type { void (%a.Frame*)*, void (%a.Frame*)*, %"struct.task::promise_type", %struct.big_structure, i1, i8*, [16 x i8], %struct.big_structure.2 }
; CHECK-LABEL: @a.resume(
; CHECK: %[[A:.*]] = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr, i32 0, i32 3
-; CHECK: %[[A:.*]] = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr, i32 0, i32 6
+; CHECK: %[[A:.*]] = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr, i32 0, i32 7
declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*)
declare i1 @llvm.coro.alloc(token) #3
Index: llvm/lib/Transforms/Coroutines/Coroutines.cpp
===================================================================
--- llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -234,6 +234,7 @@
Shape.CoroBegin = nullptr;
Shape.CoroEnds.clear();
Shape.CoroSizes.clear();
+ Shape.CoroAligns.clear();
Shape.CoroSuspends.clear();
Shape.FrameTy = nullptr;
@@ -268,6 +269,12 @@
case Intrinsic::coro_size:
CoroSizes.push_back(cast<CoroSizeInst>(II));
break;
+ case Intrinsic::coro_align:
+ CoroAligns.push_back(cast<CoroAlignInst>(II));
+ break;
+ case Intrinsic::coro_raw_frame_ptr_offset:
+ CoroRawFramePtrOffsets.push_back(cast<CoroRawFramePtrOffsetInst>(II));
+ break;
case Intrinsic::coro_frame:
CoroFrames.push_back(cast<CoroFrameInst>(II));
break;
@@ -375,6 +382,7 @@
this->SwitchLowering.ResumeSwitch = nullptr;
this->SwitchLowering.PromiseAlloca = SwitchId->getPromise();
this->SwitchLowering.ResumeEntryBlock = nullptr;
+ this->SwitchLowering.FramePtrOffset = 0;
for (auto AnySuspend : CoroSuspends) {
auto Suspend = dyn_cast<CoroSuspendInst>(AnySuspend);
Index: llvm/lib/Transforms/Coroutines/CoroSplit.cpp
===================================================================
--- llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -997,23 +997,44 @@
Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
}
-static void replaceFrameSize(coro::Shape &Shape) {
+static void replaceFrameSizeAndAlign(coro::Shape &Shape) {
if (Shape.ABI == coro::ABI::Async)
updateAsyncFuncPointerContextSize(Shape);
- if (Shape.CoroSizes.empty())
- return;
+ if (!Shape.CoroSizes.empty()) {
+ // In the same function all coro.sizes should have the same result type.
+ auto *SizeIntrin = Shape.CoroSizes.back();
+ Module *M = SizeIntrin->getModule();
+ const DataLayout &DL = M->getDataLayout();
+ auto Size = DL.getTypeAllocSize(Shape.FrameTy);
+ auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
+
+ for (CoroSizeInst *CS : Shape.CoroSizes) {
+ CS->replaceAllUsesWith(SizeConstant);
+ CS->eraseFromParent();
+ }
+ }
- // In the same function all coro.sizes should have the same result type.
- auto *SizeIntrin = Shape.CoroSizes.back();
- Module *M = SizeIntrin->getModule();
- const DataLayout &DL = M->getDataLayout();
- auto Size = DL.getTypeAllocSize(Shape.FrameTy);
- auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
+ if (!Shape.CoroAligns.empty()) {
+ auto *Intrin = Shape.CoroAligns.back();
+ auto *AlignConstant =
+ ConstantInt::get(Intrin->getType(), Shape.FrameAlign.value());
- for (CoroSizeInst *CS : Shape.CoroSizes) {
- CS->replaceAllUsesWith(SizeConstant);
- CS->eraseFromParent();
+ for (CoroAlignInst *CS : Shape.CoroAligns) {
+ CS->replaceAllUsesWith(AlignConstant);
+ CS->eraseFromParent();
+ }
+ }
+
+ if (!Shape.CoroRawFramePtrOffsets.empty()) {
+ auto *Intrin = Shape.CoroRawFramePtrOffsets.back();
+ auto *FramePtrOffset = ConstantInt::get(
+ Intrin->getType(), Shape.SwitchLowering.FramePtrOffset);
+
+ for (CoroRawFramePtrOffsetInst *CS : Shape.CoroRawFramePtrOffsets) {
+ CS->replaceAllUsesWith(FramePtrOffset);
+ CS->eraseFromParent();
+ }
}
}
@@ -1748,7 +1769,7 @@
simplifySuspendPoints(Shape);
buildCoroutineFrame(F, Shape);
- replaceFrameSize(Shape);
+ replaceFrameSizeAndAlign(Shape);
// If there are no suspend points, no split required, just remove
// the allocation and deallocation blocks, they are not needed.
Index: llvm/lib/Transforms/Coroutines/CoroInternal.h
===================================================================
--- llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -99,6 +99,8 @@
CoroBeginInst *CoroBegin;
SmallVector<AnyCoroEndInst *, 4> CoroEnds;
SmallVector<CoroSizeInst *, 2> CoroSizes;
+ SmallVector<CoroAlignInst *, 2> CoroAligns;
+ SmallVector<CoroRawFramePtrOffsetInst *, 2> CoroRawFramePtrOffsets;
SmallVector<AnyCoroSuspendInst *, 4> CoroSuspends;
SmallVector<CallInst*, 2> SwiftErrorOps;
@@ -132,6 +134,7 @@
AllocaInst *PromiseAlloca;
BasicBlock *ResumeEntryBlock;
unsigned IndexField;
+ unsigned FramePtrOffset;
bool HasFinalSuspend;
};
Index: llvm/lib/Transforms/Coroutines/CoroInstr.h
===================================================================
--- llvm/lib/Transforms/Coroutines/CoroInstr.h
+++ llvm/lib/Transforms/Coroutines/CoroInstr.h
@@ -27,6 +27,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -121,6 +122,10 @@
: cast<AllocaInst>(Arg->stripPointerCasts());
}
+ unsigned getAlignment() const {
+ return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue();
+ }
+
void clearPromise() {
Value *Arg = getArgOperand(PromiseArg);
setArgOperand(PromiseArg,
@@ -599,6 +604,30 @@
}
};
+/// This represents the llvm.coro.align instruction.
+class LLVM_LIBRARY_VISIBILITY CoroAlignInst : public IntrinsicInst {
+public:
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_align;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
+/// This represents the llvm.coro.raw.frame.ptr.offset instruction.
+class LLVM_LIBRARY_VISIBILITY CoroRawFramePtrOffsetInst : public IntrinsicInst {
+public:
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_raw_frame_ptr_offset;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst {
enum { FrameArg, UnwindArg };
Index: llvm/lib/Transforms/Coroutines/CoroFrame.cpp
===================================================================
--- llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -486,6 +486,8 @@
return StructAlign;
}
+ SmallVector<Field, 8> &getFields() { return Fields; }
+
FieldIDType getLayoutFieldIndex(FieldIDType Id) const {
assert(IsFinished && "not yet finished!");
return Fields[Id].LayoutFieldIndex;
@@ -764,21 +766,49 @@
// Because multiple allocas may own the same field slot,
// we add allocas to field here.
B.addFieldForAllocas(F, FrameData, Shape);
- // Add PromiseAlloca to Allocas list so that
- // 1. updateLayoutIndex could update its index after
- // `performOptimizedStructLayout`
- // 2. it is processed in insertSpills.
- if (Shape.ABI == coro::ABI::Switch && PromiseAlloca)
- // We assume that the promise alloca won't be modified before
- // CoroBegin and no alias will be create before CoroBegin.
- FrameData.Allocas.emplace_back(
- PromiseAlloca, DenseMap<Instruction *, llvm::Optional<APInt>>{}, false);
+
// Create an entry for every spilled value.
for (auto &S : FrameData.Spills) {
FieldIDType Id = B.addField(S.first->getType(), None);
FrameData.setFieldIndex(S.first, Id);
}
+ Optional<FieldIDType> FramePtrField = None;
+ if (Shape.ABI == coro::ABI::Switch) {
+ // Add PromiseAlloca to Allocas list so that
+ // 1. updateLayoutIndex could update its index after
+ // `performOptimizedStructLayout`
+ // 2. it is processed in insertSpills.
+ if (PromiseAlloca)
+ // We assume that the promise alloca won't be modified before
+ // CoroBegin and no alias will be create before CoroBegin.
+ FrameData.Allocas.emplace_back(
+ PromiseAlloca, DenseMap<Instruction *, llvm::Optional<APInt>>{},
+ false);
+
+ Align FrameAlign =
+ std::max_element(
+ B.getFields().begin(), B.getFields().end(),
+ [](auto &F1, auto &F2) { return F1.Alignment < F2.Alignment; })
+ ->Alignment;
+
+ // Check for over-alignment.
+ unsigned NewAlign = Shape.getSwitchCoroId()->getAlignment();
+ if (NewAlign && FrameAlign > NewAlign) {
+ BasicBlock &Entry = F.getEntryBlock();
+ IRBuilder<> Builder(&Entry, Entry.getFirstInsertionPt());
+
+ // Reserve frame space for raw frame pointer.
+ Value *Mem = Shape.CoroBegin->getMem();
+ AllocaInst *FramePtrAddr =
+ Builder.CreateAlloca(Mem->getType(), nullptr, "alloc.frame.ptr");
+ FramePtrField = B.addFieldForAlloca(FramePtrAddr);
+ FrameData.setFieldIndex(FramePtrAddr, *FramePtrField);
+ FrameData.Allocas.emplace_back(
+ FramePtrAddr, DenseMap<Instruction *, llvm::Optional<APInt>>{}, true);
+ }
+ }
+
B.finish(FrameTy);
FrameData.updateLayoutIndex(B);
Shape.FrameAlign = B.getStructAlign();
@@ -790,6 +820,12 @@
Shape.SwitchLowering.IndexField =
B.getLayoutFieldIndex(*SwitchIndexFieldId);
+ if (FramePtrField) {
+ FieldIDType FieldIdx = B.getLayoutFieldIndex(*FramePtrField);
+ Shape.SwitchLowering.FramePtrOffset =
+ DL.getStructLayout(FrameTy)->getElementOffset(FieldIdx);
+ }
+
// Also round the frame size up to a multiple of its alignment, as is
// generally expected in C/C++.
Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
Index: llvm/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm/include/llvm/IR/Intrinsics.td
+++ llvm/include/llvm/IR/Intrinsics.td
@@ -1237,6 +1237,8 @@
def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
+def int_coro_align : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
+def int_coro_raw_frame_ptr_offset : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>;
def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>;
Index: llvm/docs/Coroutines.rst
===================================================================
--- llvm/docs/Coroutines.rst
+++ llvm/docs/Coroutines.rst
@@ -948,6 +948,59 @@
The `coro.size` intrinsic is lowered to a constant representing the size of
the coroutine frame.
+.. _coro.align:
+
+'llvm.coro.align' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+::
+
+ declare i32 @llvm.coro.align.i32()
+ declare i64 @llvm.coro.align.i64()
+
+Overview:
+"""""""""
+
+The '``llvm.coro.align``' intrinsic returns the alignment of the coroutine frame
+in bytes. This is only supported for switched-resume coroutines.
+
+Arguments:
+""""""""""
+
+None
+
+Semantics:
+""""""""""
+
+The `coro.align` intrinsic is lowered to a constant representing the alignment
+of the coroutine frame.
+
+.. _coro.raw.frame.ptr.offset:
+
+'llvm.coro.raw.frame.ptr.offset' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+::
+
+ declare i32 @llvm.coro.raw.frame.ptr.offset.i32()
+ declare i64 @llvm.coro.raw.frame.ptr.offset.i64()
+
+Overview:
+"""""""""
+
+The '``llvm.coro.raw.frame.ptr.offset``' intrinsic returns the byte offset of
+the raw memory block address (returned by the allocator) in coroutine frame.
+This is only supported for switched-resume coroutines.
+
+Arguments:
+""""""""""
+
+None
+
+Semantics:
+""""""""""
+
+The `coro.raw.frame.ptr.offset` intrinsic is lowered to a constant representing
+the byte offset of the raw memory block address in coroutine frame.
+
.. _coro.begin:
'llvm.coro.begin' Intrinsic
Index: clang/test/CodeGenCoroutines/coro-gro.cpp
===================================================================
--- clang/test/CodeGenCoroutines/coro-gro.cpp
+++ clang/test/CodeGenCoroutines/coro-gro.cpp
@@ -49,7 +49,8 @@
// CHECK: %[[GroActive:.+]] = alloca i1
// CHECK: %[[Size:.+]] = call i64 @llvm.coro.size.i64()
- // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[Size]])
+ // CHECK: %[[NewSize:.+]] = add i64 %[[Size]],
+ // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[NewSize]])
// CHECK: store i1 false, i1* %[[GroActive]]
// CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeC1Ev(
// CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_type17get_return_objectEv(
@@ -66,8 +67,8 @@
// Destroy promise and free the memory.
// CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeD1Ev(
- // CHECK: %[[Mem:.+]] = call i8* @llvm.coro.free(
- // CHECK: call void @_ZdlPv(i8* %[[Mem]])
+ // CHECK: call i8* @llvm.coro.free(
+ // CHECK: call void @_ZdlPv(i8* %{{.*}})
// Initialize retval from Gro and destroy Gro
Index: clang/test/CodeGenCoroutines/coro-cleanup.cpp
===================================================================
--- clang/test/CodeGenCoroutines/coro-cleanup.cpp
+++ clang/test/CodeGenCoroutines/coro-cleanup.cpp
@@ -78,12 +78,18 @@
// CHECK: [[Cleanup]]:
// CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_typeD1Ev(
- // CHECK: %[[Mem0:.+]] = call i8* @llvm.coro.free(
- // CHECK: call void @_ZdlPv(i8* %[[Mem0]]
+ // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(
+ // CHECK: call i64 @llvm.coro.align.i64()
+ // CHECK: call i32 @llvm.coro.raw.frame.ptr.offset.i32()
+ // CHECK: %[[MEM2:.+]] = select i1 %{{.*}}, i8* %{{.*}}, i8* %[[MEM]]
+ // CHECK: call void @_ZdlPv(i8* %[[MEM2]])
// CHECK: [[Dealloc]]:
- // CHECK: %[[Mem:.+]] = call i8* @llvm.coro.free(
- // CHECK: call void @_ZdlPv(i8* %[[Mem]])
+ // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(
+ // CHECK: call i64 @llvm.coro.align.i64()
+ // CHECK: call i32 @llvm.coro.raw.frame.ptr.offset.i32()
+ // CHECK: %[[MEM2:.+]] = select i1 %{{.*}}, i8* %{{.*}}, i8* %[[MEM]]
+ // CHECK: call void @_ZdlPv(i8* %[[MEM2]])
co_return;
}
Index: clang/test/CodeGenCoroutines/coro-alloc.cpp
===================================================================
--- clang/test/CodeGenCoroutines/coro-alloc.cpp
+++ clang/test/CodeGenCoroutines/coro-alloc.cpp
@@ -60,12 +60,27 @@
// CHECK: br i1 %[[NeedAlloc]], label %[[AllocBB:.+]], label %[[InitBB:.+]]
// CHECK: [[AllocBB]]:
- // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
- // CHECK: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[SIZE]])
+ // CHECK-NEXT: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
+ // CHECK-NEXT: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64()
+ // CHECK-NEXT: %[[DIFF:.+]] = sub nsw i64 %[[ALIGN]], 16
+ // CHECK-NEXT: %[[CMP:.+]] = icmp sgt i64 %[[DIFF]], 0
+ // CHECK-NEXT: %[[SEL:.+]] = select i1 %[[CMP]], i64 %[[DIFF]], i64 0
+ // CHECK-NEXT: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %[[SEL]]
+ // CHECK-NEXT: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[NEWSIZE]])
+ // CHECK-NEXT: br label %[[AlignAllocBB:.+]]
+
+ // CHECK: [[AlignAllocBB]]:
+ // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64()
+ // CHECK: %[[ALIGNED:.+]] = getelementptr inbounds i8, i8* %[[MEM]]
+ // CHECK: call void @llvm.assume(i1 true) [ "align"(i8* %[[ALIGNED]], i64 %[[ALIGN]]) ]
+ // CHECK: %[[OFFSET:.+]] = call i32 @llvm.coro.raw.frame.ptr.offset.i32()
+ // CHECK: %[[ADDR:.+]] = getelementptr inbounds i8, i8* %aligned_result, i32 %[[OFFSET]]
+ // CHECK: %[[ADDR2:.+]] = bitcast i8* %[[ADDR]] to i8**
+ // CHECK: store i8* %[[MEM]], i8** %[[ADDR2]], align 8
// CHECK: br label %[[InitBB]]
// CHECK: [[InitBB]]:
- // CHECK: %[[PHI:.+]] = phi i8* [ null, %{{.+}} ], [ %call, %[[AllocBB]] ]
+ // CHECK: %[[PHI:.+]] = phi i8* [ null, %{{.+}} ], [ %[[ALIGNED]], %[[AlignAllocBB]] ]
// CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin(token %[[ID]], i8* %[[PHI]])
// CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]])
@@ -73,8 +88,15 @@
// CHECK: br i1 %[[NeedDealloc]], label %[[FreeBB:.+]], label %[[Afterwards:.+]]
// CHECK: [[FreeBB]]:
- // CHECK: call void @_ZdlPv(i8* %[[MEM]])
- // CHECK: br label %[[Afterwards]]
+ // CHECK-NEXT: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64()
+ // CHECK-NEXT: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], 16
+ // CHECK-NEXT: %[[OFFSET:.+]] = call i32 @llvm.coro.raw.frame.ptr.offset.i32()
+ // CHECK-NEXT: %[[ADDR:.+]] = getelementptr inbounds i8, i8* %[[MEM]], i32 %[[OFFSET]]
+ // CHECK-NEXT: %[[ADDR2:.+]] = bitcast i8* %[[ADDR]] to i8**
+ // CHECK-NEXT: %[[MEM2:.+]] = load i8*, i8** %[[ADDR2]], align 8
+ // CHECK-NEXT: %[[MEM3:.+]] = select i1 %[[CMP]], i8* %[[MEM2]], i8* %[[MEM]]
+ // CHECK-NEXT: call void @_ZdlPv(i8* %[[MEM3]])
+ // CHECK-NEXT: br label %[[Afterwards]]
// CHECK: [[Afterwards]]:
// CHECK: ret void
@@ -98,11 +120,12 @@
extern "C" void f1(promise_new_tag ) {
// CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16
// CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
- // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv15promise_new_tagEE12promise_typenwEm(i64 %[[SIZE]])
+ // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]],
+ // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv15promise_new_tagEE12promise_typenwEm(i64 %[[NEWSIZE]])
// CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin(
- // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]])
- // CHECK: call void @_ZdlPv(i8* %[[MEM]])
+ // CHECK: call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]])
+ // CHECK: call void @_ZdlPv(i8* %{{.*}})
co_return;
}
@@ -130,7 +153,8 @@
// CHECK: %[[INT:.+]] = load i32, i32* %x.addr, align 4
// CHECK: %[[FLOAT:.+]] = load float, float* %y.addr, align 4
// CHECK: %[[DOUBLE:.+]] = load double, double* %z.addr, align 8
- // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv34promise_matching_placement_new_tagifdEE12promise_typenwEmS1_ifd(i64 %[[SIZE]], i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]])
+ // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]],
+ // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv34promise_matching_placement_new_tagifdEE12promise_typenwEmS1_ifd(i64 %[[NEWSIZE]], i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]])
co_return;
}
@@ -177,11 +201,15 @@
extern "C" void f2(promise_delete_tag) {
// CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16
// CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
- // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[SIZE]])
+ // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]],
+ // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[NEWSIZE]])
// CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin(
// CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]])
- // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv18promise_delete_tagEE12promise_typedlEPv(i8* %[[MEM]])
+ // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64()
+ // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]],
+ // CHECK: %[[MEM2:.+]] = select i1 %[[CMP]], i8* {{.*}}, i8* %[[MEM]]
+ // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv18promise_delete_tagEE12promise_typedlEPv(i8* %[[MEM2]])
co_return;
}
@@ -202,12 +230,16 @@
extern "C" void f3(promise_sized_delete_tag) {
// CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16
// CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
- // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[SIZE]])
+ // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]],
+ // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[NEWSIZE]])
// CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin(
// CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]])
- // CHECK: %[[SIZE2:.+]] = call i64 @llvm.coro.size.i64()
- // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv24promise_sized_delete_tagEE12promise_typedlEPvm(i8* %[[MEM]], i64 %[[SIZE2]])
+ // CHECK: call i64 @llvm.coro.align.i64()
+ // CHECK: %[[MEM2:.+]] = select i1 {{.*}}, i8* {{.*}}, i8* %[[MEM]]
+ // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
+ // CHECK: %[[SIZE2:.+]] = add i64 %[[SIZE]],
+ // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv24promise_sized_delete_tagEE12promise_typedlEPvm(i8* %[[MEM2]], i64 %[[SIZE2]])
co_return;
}
@@ -230,7 +262,8 @@
// CHECK: %[[Gro:.+]] = alloca i32
// CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16
// CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
- // CHECK: %[[MEM:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[SIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow)
+ // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]],
+ // CHECK: %[[MEM:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[NEWSIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow)
// CHECK: %[[OK:.+]] = icmp ne i8* %[[MEM]], null
// CHECK: br i1 %[[OK]], label %[[OKBB:.+]], label %[[ERRBB:.+]]
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -1885,6 +1885,8 @@
void EmitOpenCLKernelMetadata(const FunctionDecl *FD,
llvm::Function *Fn);
+ llvm::Value *EmitBuiltinAlignTo(void *Args, const Expr *E, bool AlignUp);
+
public:
CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false);
~CodeGenFunction();
@@ -4013,6 +4015,8 @@
RValue EmitBuiltinIsAligned(const CallExpr *E);
/// Emit IR for __builtin_align_up/__builtin_align_down.
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp);
+ llvm::Value *EmitBuiltinAlignTo(llvm::Value *Src, llvm::Value *Align,
+ const Expr *E, bool AlignUp);
llvm::Function *generateBuiltinOSLogHelperFunction(
const analyze_os_log::OSLogBufferLayout &Layout,
Index: clang/lib/CodeGen/CGCoroutine.cpp
===================================================================
--- clang/lib/CodeGen/CGCoroutine.cpp
+++ clang/lib/CodeGen/CGCoroutine.cpp
@@ -15,6 +15,9 @@
#include "llvm/ADT/ScopeExit.h"
#include "clang/AST/StmtCXX.h"
#include "clang/AST/StmtVisitor.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include <cstdint>
using namespace clang;
using namespace CodeGen;
@@ -415,6 +418,67 @@
}
namespace {
+
+void overAllocateFrame(CodeGenFunction &CGF, llvm::CallInst *CI, bool IsAlloc) {
+ unsigned CoroSizeIdx = IsAlloc ? 0 : 1;
+ CodeGenModule &CGM = CGF.CGM;
+ CGBuilderTy &Builder = CGF.Builder;
+ auto OrigIP = Builder.saveIP();
+ Builder.SetInsertPoint(CI);
+ llvm::Function *CoroAlign =
+ CGM.getIntrinsic(llvm::Intrinsic::coro_align, CGF.SizeTy);
+ const auto &TI = CGM.getContext().getTargetInfo();
+ unsigned AlignOfNew = TI.getNewAlign() / TI.getCharWidth();
+ Value *AlignCall = Builder.CreateCall(CoroAlign);
+ // int x = coro_align - AlignOfNew;
+ // coro_size + (x > 0 ? x : 0)
+ Value *AlignOfNewInt = llvm::ConstantInt::get(CGF.SizeTy, AlignOfNew, true);
+ Value *Diff = Builder.CreateNSWSub(AlignCall, AlignOfNewInt);
+ Value *Zero = llvm::ConstantInt::getSigned(CGF.SizeTy, 0);
+ Value *Cmp = Builder.CreateICmp(llvm::CmpInst::ICMP_SGT, Diff, Zero);
+ Value *Extra = Builder.CreateSelect(Cmp, Diff, Zero);
+ Value *NewCoroSize = Builder.CreateAdd(CI->getArgOperand(CoroSizeIdx), Extra);
+ CI->setArgOperand(CoroSizeIdx, NewCoroSize);
+ Builder.restoreIP(OrigIP);
+}
+
+void handleOverAlignedFrame(CodeGenFunction &CGF, llvm::CallInst *CoroFree) {
+ // If the frame is not overaligned, this sequence should be optimized out.
+ auto SaveIP = CGF.Builder.saveIP();
+ CGF.Builder.SetInsertPoint(CoroFree->getParent()->getFirstNonPHIOrDbg());
+ assert(CoroFree->getNumUses() == 1);
+ auto *Dealloc = cast<llvm::CallInst>(CoroFree->user_back());
+ llvm::Function *CoroAlign =
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_align, CGF.SizeTy);
+ Value *AlignCall = CGF.Builder.CreateCall(CoroAlign);
+ const auto &TI = CGF.CGM.getContext().getTargetInfo();
+ Value *AlignOfNew =
+ llvm::ConstantInt::get(CGF.SizeTy, TI.getNewAlign() / TI.getCharWidth());
+ Value *Cmp =
+ CGF.Builder.CreateICmp(llvm::CmpInst::ICMP_UGT, AlignCall, AlignOfNew);
+ llvm::Function *RawFramePtrOffsetIntrin = CGF.CGM.getIntrinsic(
+ llvm::Intrinsic::coro_raw_frame_ptr_offset, CGF.Int32Ty);
+ llvm::Value *RawFramePtrOffset =
+ CGF.Builder.CreateCall(RawFramePtrOffsetIntrin);
+ Value *FramePtrAddrStart =
+ CGF.Builder.CreateInBoundsGEP(CoroFree, {RawFramePtrOffset});
+ llvm::Value *FramePtrAddr = CGF.Builder.CreatePointerCast(
+ FramePtrAddrStart, CGF.Int8PtrTy->getPointerTo());
+ Value *FramePtr =
+ CGF.Builder.CreateLoad({FramePtrAddr, CGF.getPointerAlign()});
+ Value *MemPtr = CGF.Builder.CreateSelect(Cmp, FramePtr, CoroFree);
+
+ Dealloc->setArgOperand(0, MemPtr);
+ assert(Dealloc->getNumArgOperands() >= 1);
+ if (Dealloc->getNumArgOperands() > 1) {
+ // Size may only be the second argument of allocator call.
+ auto *CoroSize = cast<llvm::IntrinsicInst>(Dealloc->getArgOperand(1));
+ if (CoroSize->getIntrinsicID() == llvm::Intrinsic::coro_size)
+ overAllocateFrame(CGF, Dealloc, /*IsAlloc*/ false);
+ }
+ CGF.Builder.restoreIP(SaveIP);
+}
+
// Make sure to call coro.delete on scope exit.
struct CallCoroDelete final : public EHScopeStack::Cleanup {
Stmt *Deallocate;
@@ -436,9 +500,6 @@
CGF.EmitBlock(FreeBB);
CGF.EmitStmt(Deallocate);
- auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free");
- CGF.EmitBlock(AfterFreeBB);
-
// We should have captured coro.free from the emission of deallocate.
auto *CoroFree = CGF.CurCoro.Data->LastCoroFree;
if (!CoroFree) {
@@ -447,6 +508,11 @@
return;
}
+ handleOverAlignedFrame(CGF, CoroFree);
+
+ auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free");
+ CGF.EmitBlock(AfterFreeBB);
+
// Get back to the block we were originally and move coro.free there.
auto *InsertPt = SaveInsertBlock->getTerminator();
CoroFree->moveBefore(InsertPt);
@@ -463,7 +529,7 @@
}
explicit CallCoroDelete(Stmt *DeallocStmt) : Deallocate(DeallocStmt) {}
};
-}
+} // namespace
namespace {
struct GetReturnObjectManager {
@@ -547,6 +613,7 @@
auto *EntryBB = Builder.GetInsertBlock();
auto *AllocBB = createBasicBlock("coro.alloc");
+ auto *AlignAllocBB = createBasicBlock("coro.alloc.align");
auto *InitBB = createBasicBlock("coro.init");
auto *FinalBB = createBasicBlock("coro.final");
auto *RetBB = createBasicBlock("coro.ret");
@@ -566,7 +633,8 @@
EmitBlock(AllocBB);
auto *AllocateCall = EmitScalarExpr(S.getAllocate());
- auto *AllocOrInvokeContBB = Builder.GetInsertBlock();
+ overAllocateFrame(*this, cast<llvm::CallInst>(AllocateCall),
+ /*IsAlloc*/ true);
// Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided.
if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) {
@@ -575,22 +643,38 @@
// See if allocation was successful.
auto *NullPtr = llvm::ConstantPointerNull::get(Int8PtrTy);
auto *Cond = Builder.CreateICmpNE(AllocateCall, NullPtr);
- Builder.CreateCondBr(Cond, InitBB, RetOnFailureBB);
+ Builder.CreateCondBr(Cond, AlignAllocBB, RetOnFailureBB);
// If not, return OnAllocFailure object.
EmitBlock(RetOnFailureBB);
EmitStmt(RetOnAllocFailure);
}
else {
- Builder.CreateBr(InitBB);
+ Builder.CreateBr(AlignAllocBB);
}
+ EmitBlock(AlignAllocBB);
+
+ auto *CoroAlign =
+ Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::coro_align, SizeTy));
+ llvm::Value *RawAllocate = AllocateCall;
+ AllocateCall =
+ EmitBuiltinAlignTo(AllocateCall, CoroAlign, S.getAllocate(), true);
+ llvm::Function *RawFramePtrOffsetIntrin =
+ CGM.getIntrinsic(llvm::Intrinsic::coro_raw_frame_ptr_offset, Int32Ty);
+ llvm::Value *RawFramePtrOffset = Builder.CreateCall(RawFramePtrOffsetIntrin);
+ llvm::Value *FramePtrAddrStart =
+ Builder.CreateInBoundsGEP(AllocateCall, {RawFramePtrOffset});
+ llvm::Value *FramePtrAddr =
+ Builder.CreatePointerCast(FramePtrAddrStart, Int8PtrTy->getPointerTo());
+ Builder.CreateStore(RawAllocate, {FramePtrAddr, getPointerAlign()});
+
EmitBlock(InitBB);
// Pass the result of the allocation to coro.begin.
auto *Phi = Builder.CreatePHI(VoidPtrTy, 2);
Phi->addIncoming(NullPtr, EntryBB);
- Phi->addIncoming(AllocateCall, AllocOrInvokeContBB);
+ Phi->addIncoming(AllocateCall, AlignAllocBB);
auto *CoroBegin = Builder.CreateCall(
CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi});
CurCoro.Data->CoroBegin = CoroBegin;
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -16752,6 +16752,23 @@
auto *One = llvm::ConstantInt::get(IntType, 1);
Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
}
+
+ BuiltinAlignArgs(llvm::Value *SrcV, llvm::Value *Align,
+ CodeGenFunction &CGF) {
+ Src = SrcV;
+ SrcType = Src->getType();
+ if (SrcType->isPointerTy()) {
+ IntType = IntegerType::get(
+ CGF.getLLVMContext(),
+ CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
+ } else {
+ assert(SrcType->isIntegerTy());
+ IntType = cast<llvm::IntegerType>(SrcType);
+ }
+ Alignment = Align;
+ auto *One = llvm::ConstantInt::get(IntType, 1);
+ Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
+ }
};
} // namespace
@@ -16767,12 +16784,10 @@
llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
}
-/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
-/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
-/// llvm.ptrmask instrinsic (with a GEP before in the align_up case).
-/// TODO: actually use ptrmask once most optimization passes know about it.
-RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
- BuiltinAlignArgs Args(E, *this);
+llvm::Value *CodeGenFunction::EmitBuiltinAlignTo(void *ArgsPtr, const Expr *E,
+ bool AlignUp) {
+ assert(ArgsPtr);
+ const BuiltinAlignArgs &Args = *static_cast<BuiltinAlignArgs *>(ArgsPtr);
llvm::Value *SrcAddr = Args.Src;
if (Args.Src->getType()->isPointerTy())
SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr");
@@ -16811,7 +16826,23 @@
emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment);
}
assert(Result->getType() == Args.SrcType);
- return RValue::get(Result);
+ return Result;
+}
+
+/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
+/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
+/// llvm.ptrmask instrinsic (with a GEP before in the align_up case).
+/// TODO: actually use ptrmask once most optimization passes know about it.
+RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
+ BuiltinAlignArgs Args(E, *this);
+ return RValue::get(EmitBuiltinAlignTo(&Args, E, AlignUp));
+}
+
+llvm::Value *CodeGenFunction::EmitBuiltinAlignTo(llvm::Value *Src,
+ llvm::Value *Align,
+ const Expr *E, bool AlignUp) {
+ BuiltinAlignArgs Args(Src, Align, *this);
+ return EmitBuiltinAlignTo(&Args, E, AlignUp);
}
Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits