jdoerfert updated this revision to Diff 244005.
jdoerfert added a comment.
Improve comment
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D74372/new/
https://reviews.llvm.org/D74372
Files:
clang/lib/CodeGen/CodeGenFunction.cpp
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/lib/Transforms/Utils/CodeExtractor.cpp
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
Index: llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
===================================================================
--- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -96,7 +96,7 @@
EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
Builder.CreateUnreachable();
- EXPECT_FALSE(verifyModule(*M));
+ EXPECT_FALSE(verifyModule(*M, &errs()));
}
TEST_F(OpenMPIRBuilderTest, CreateCancel) {
@@ -151,7 +151,7 @@
OMPBuilder.popFinalizationCB();
Builder.CreateUnreachable();
- EXPECT_FALSE(verifyModule(*M));
+ EXPECT_FALSE(verifyModule(*M, &errs()));
}
TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) {
@@ -212,7 +212,7 @@
OMPBuilder.popFinalizationCB();
Builder.CreateUnreachable();
- EXPECT_FALSE(verifyModule(*M));
+ EXPECT_FALSE(verifyModule(*M, &errs()));
}
TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
@@ -267,7 +267,7 @@
OMPBuilder.popFinalizationCB();
Builder.CreateUnreachable();
- EXPECT_FALSE(verifyModule(*M));
+ EXPECT_FALSE(verifyModule(*M, &errs()));
}
TEST_F(OpenMPIRBuilderTest, DbgLoc) {
@@ -362,9 +362,9 @@
auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
- IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
- Loc, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
-
+ IRBuilder<>::InsertPoint AfterIP =
+ OMPBuilder.CreateParallel(Loc, BodyGenCB, PrivCB, FiniCB, nullptr,
+ nullptr, OMP_PROC_BIND_default, false);
EXPECT_EQ(NumBodiesGenerated, 1U);
EXPECT_EQ(NumPrivatizedVars, 1U);
EXPECT_EQ(NumFinalizationPoints, 1U);
@@ -372,10 +372,12 @@
Builder.restoreIP(AfterIP);
Builder.CreateRetVoid();
+ OMPBuilder.finalize();
+
EXPECT_NE(PrivAI, nullptr);
Function *OutlinedFn = PrivAI->getFunction();
EXPECT_NE(F, OutlinedFn);
- EXPECT_FALSE(verifyModule(*M));
+ EXPECT_FALSE(verifyModule(*M, &errs()));
EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoRecurse));
EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
@@ -470,6 +472,7 @@
Builder.restoreIP(AfterIP);
Builder.CreateRetVoid();
+ OMPBuilder.finalize();
EXPECT_NE(PrivAI, nullptr);
Function *OutlinedFn = PrivAI->getFunction();
@@ -595,6 +598,7 @@
Builder.restoreIP(AfterIP);
Builder.CreateRetVoid();
+ OMPBuilder.finalize();
EXPECT_FALSE(verifyModule(*M, &errs()));
Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp
===================================================================
--- llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1405,11 +1405,7 @@
DISubprogram *OldSP = OldFunc.getSubprogram();
LLVMContext &Ctx = OldFunc.getContext();
- // See llvm.org/PR44560, OpenMP passes an invalid subprogram to CodeExtractor.
- bool NeedWorkaroundForOpenMPIRBuilderBug =
- OldSP && OldSP->getRetainedNodes()->isTemporary();
-
- if (!OldSP || NeedWorkaroundForOpenMPIRBuilderBug) {
+ if (!OldSP) {
// Erase any debug info the new function contains.
stripDebugInfo(NewFunc);
// Make sure the old function doesn't contain any non-local metadata refs.
Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===================================================================
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -93,6 +93,55 @@
void OpenMPIRBuilder::initialize() { initializeTypes(M); }
+void OpenMPIRBuilder::finalize() {
+ for (OutlineInfo &OI : OutlineInfos) {
+ assert(!OI.Blocks.empty() &&
+ "Outlined regions should have at least a single block!");
+ BasicBlock *RegEntryBB = OI.Blocks.front();
+ Function *OuterFn = RegEntryBB->getParent();
+ CodeExtractorAnalysisCache CEAC(*OuterFn);
+ CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
+ /* AggregateArgs */ false,
+ /* BlockFrequencyInfo */ nullptr,
+ /* BranchProbabilityInfo */ nullptr,
+ /* AssumptionCache */ nullptr,
+ /* AllowVarArgs */ true,
+ /* AllowAlloca */ true,
+ /* Suffix */ ".omp_par");
+
+ LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
+
+ Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
+
+ LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n");
+ LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
+
+ // For compability with the clang CG we move the outlined function after the
+ // one with the parallel region.
+ OutlinedFn->removeFromParent();
+ M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
+
+ // Remove the artificial entry introduced by the extractor right away, we
+ // made our own entry block after all.
+ {
+ BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
+ assert(ArtificialEntry.getUniqueSuccessor() == RegEntryBB);
+ assert(RegEntryBB->getUniquePredecessor() == &ArtificialEntry);
+ RegEntryBB->moveBefore(&ArtificialEntry);
+ ArtificialEntry.eraseFromParent();
+ }
+ assert(&OutlinedFn->getEntryBlock() == RegEntryBB);
+ assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
+
+ // Run a user callback, e.g. to add attributes.
+ if (OI.PostOutlineCB)
+ OI.PostOutlineCB(*OutlinedFn);
+ }
+
+ // Allow finalize to be called multiple times.
+ OutlineInfos.clear();
+}
+
Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
IdentFlag LocFlags) {
// Enable "C-mode".
@@ -415,17 +464,18 @@
// PRegionExitBB <- A common exit to simplify block collection.
//
- LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n");
+ LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
// Let the caller create the body.
assert(BodyGenCB && "Expected body generation callback!");
InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
- LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n");
+ LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
+ OutlineInfo OI;
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
- SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist;
+ SmallVector<BasicBlock *, 32> Worklist;
ParallelRegionBlockSet.insert(PRegEntryBB);
ParallelRegionBlockSet.insert(PRegExitBB);
@@ -433,14 +483,14 @@
Worklist.push_back(PRegEntryBB);
while (!Worklist.empty()) {
BasicBlock *BB = Worklist.pop_back_val();
- ParallelRegionBlocks.push_back(BB);
+ OI.Blocks.push_back(BB);
for (BasicBlock *SuccBB : successors(BB))
if (ParallelRegionBlockSet.insert(SuccBB).second)
Worklist.push_back(SuccBB);
}
CodeExtractorAnalysisCache CEAC(*OuterFn);
- CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr,
+ CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
/* AggregateArgs */ false,
/* BlockFrequencyInfo */ nullptr,
/* BranchProbabilityInfo */ nullptr,
@@ -455,7 +505,7 @@
Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
- LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n");
+ LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
FunctionCallee TIDRTLFn =
getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num);
@@ -496,56 +546,12 @@
PrivHelper(*Output);
}
- LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n");
+ LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
LLVM_DEBUG({
- for (auto *BB : ParallelRegionBlocks)
+ for (auto *BB : OI.Blocks)
dbgs() << " PBR: " << BB->getName() << "\n";
});
- // Add some known attributes to the outlined function.
- Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
- OutlinedFn->addParamAttr(0, Attribute::NoAlias);
- OutlinedFn->addParamAttr(1, Attribute::NoAlias);
- OutlinedFn->addFnAttr(Attribute::NoUnwind);
- OutlinedFn->addFnAttr(Attribute::NoRecurse);
-
- LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n");
- LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
-
- // For compability with the clang CG we move the outlined function after the
- // one with the parallel region.
- OutlinedFn->removeFromParent();
- M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
-
- // Remove the artificial entry introduced by the extractor right away, we
- // made our own entry block after all.
- {
- BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
- assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB);
- assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry);
- PRegEntryBB->moveBefore(&ArtificialEntry);
- ArtificialEntry.eraseFromParent();
- }
- LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n");
- assert(&OutlinedFn->getEntryBlock() == PRegEntryBB);
-
- assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
- assert(OutlinedFn->arg_size() >= 2 &&
- "Expected at least tid and bounded tid as arguments");
- unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2;
-
- CallInst *CI = cast<CallInst>(OutlinedFn->user_back());
- CI->getParent()->setName("omp_parallel");
- Builder.SetInsertPoint(CI);
-
- // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
- Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
- Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)};
-
- SmallVector<Value *, 16> RealArgs;
- RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
- RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
-
FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
@@ -558,59 +564,86 @@
// callback callee.
F->addMetadata(
llvm::LLVMContext::MD_callback,
- *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
- 2, {-1, -1},
- /* VarArgsArePassed */ true)}));
+ *llvm::MDNode::get(
+ Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
+ /* VarArgsArePassed */ true)}));
}
}
- Builder.CreateCall(RTLFn, RealArgs);
+ OI.PostOutlineCB = [=](Function &OutlinedFn) {
+ // Add some known attributes.
+ OutlinedFn.addParamAttr(0, Attribute::NoAlias);
+ OutlinedFn.addParamAttr(1, Attribute::NoAlias);
+ OutlinedFn.addFnAttr(Attribute::NoUnwind);
+ OutlinedFn.addFnAttr(Attribute::NoRecurse);
- LLVM_DEBUG(dbgs() << "With fork_call placed: "
- << *Builder.GetInsertBlock()->getParent() << "\n");
+ assert(OutlinedFn.arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+ unsigned NumCapturedVars =
+ OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
- InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
- InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
- UI->eraseFromParent();
+ CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
+ CI->getParent()->setName("omp_parallel");
+ Builder.SetInsertPoint(CI);
- // Initialize the local TID stack location with the argument value.
- Builder.SetInsertPoint(PrivTID);
- Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin();
- Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
+ // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
+ Value *ForkCallArgs[] = {
+ Ident, Builder.getInt32(NumCapturedVars),
+ Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
- // If no "if" clause was present we do not need the call created during
- // outlining, otherwise we reuse it in the serialized parallel region.
- if (!ElseTI) {
- CI->eraseFromParent();
- } else {
+ SmallVector<Value *, 16> RealArgs;
+ RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
+ RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
- // If an "if" clause was present we are now generating the serialized
- // version into the "else" branch.
- Builder.SetInsertPoint(ElseTI);
+ Builder.CreateCall(RTLFn, RealArgs);
- // Build calls __kmpc_serialized_parallel(&Ident, GTid);
- Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
- Builder.CreateCall(
- getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
- SerializedParallelCallArgs);
+ LLVM_DEBUG(dbgs() << "With fork_call placed: "
+ << *Builder.GetInsertBlock()->getParent() << "\n");
- // OutlinedFn(>id, &zero, CapturedStruct);
- CI->removeFromParent();
- Builder.Insert(CI);
+ InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
- // __kmpc_end_serialized_parallel(&Ident, GTid);
- Value *EndArgs[] = {Ident, ThreadID};
- Builder.CreateCall(
- getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
- EndArgs);
+ // Initialize the local TID stack location with the argument value.
+ Builder.SetInsertPoint(PrivTID);
+ Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
+ Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
- LLVM_DEBUG(dbgs() << "With serialized parallel region: "
- << *Builder.GetInsertBlock()->getParent() << "\n");
- }
+ // If no "if" clause was present we do not need the call created during
+ // outlining, otherwise we reuse it in the serialized parallel region.
+ if (!ElseTI) {
+ CI->eraseFromParent();
+ } else {
+
+ // If an "if" clause was present we are now generating the serialized
+ // version into the "else" branch.
+ Builder.SetInsertPoint(ElseTI);
+
+ // Build calls __kmpc_serialized_parallel(&Ident, GTid);
+ Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
+ Builder.CreateCall(
+ getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
+ SerializedParallelCallArgs);
+
+ // OutlinedFn(>id, &zero, CapturedStruct);
+ CI->removeFromParent();
+ Builder.Insert(CI);
+
+ // __kmpc_end_serialized_parallel(&Ident, GTid);
+ Value *EndArgs[] = {Ident, ThreadID};
+ Builder.CreateCall(
+ getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
+ EndArgs);
+
+ LLVM_DEBUG(dbgs() << "With serialized parallel region: "
+ << *Builder.GetInsertBlock()->getParent() << "\n");
+ }
+
+ for (Instruction *I : ToBeDeleted)
+ I->eraseFromParent();
+ };
// Adjust the finalization stack, verify the adjustment, and call the
- // finalize function a last time to finalize values between the pre-fini block
- // and the exit block if we left the parallel "the normal way".
+ // finalize function a last time to finalize values between the pre-fini
+ // block and the exit block if we left the parallel "the normal way".
auto FiniInfo = FinalizationStack.pop_back_val();
(void)FiniInfo;
assert(FiniInfo.DK == OMPD_parallel &&
@@ -618,15 +651,17 @@
Instruction *PreFiniTI = PRegPreFiniBB->getTerminator();
assert(PreFiniTI->getNumSuccessors() == 1 &&
- PreFiniTI->getSuccessor(0)->size() == 1 &&
- isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) &&
+ PreFiniTI->getSuccessor(0) == PRegExitBB &&
"Unexpected CFG structure!");
InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator());
FiniCB(PreFiniIP);
- for (Instruction *I : ToBeDeleted)
- I->eraseFromParent();
+ InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
+ UI->eraseFromParent();
+
+ // Register the outlined info.
+ addOutlineInfo(std::move(OI));
return AfterIP;
}
Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
===================================================================
--- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -34,6 +34,9 @@
/// before any other method and only once!
void initialize();
+ /// Finalize the underlying module, e.g., by outlining regions.
+ void finalize();
+
/// Add attributes known for \p FnID to \p Fn.
void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
@@ -254,6 +257,20 @@
/// Map to remember existing ident_t*.
DenseMap<std::pair<Constant *, uint64_t>, GlobalVariable *> IdentMap;
+
+ /// Helper that contains information about regions we need to outline
+ /// during finalization.
+ struct OutlineInfo {
+ SmallVector<BasicBlock *, 32> Blocks;
+ using PostOutlineCBTy = std::function<void(Function &)>;
+ PostOutlineCBTy PostOutlineCB;
+ };
+
+ /// Collection of regions that need to be outlined during finalization.
+ SmallVector<OutlineInfo, 16> OutlineInfos;
+
+ /// Add a new region that will be outlined later.
+ void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
};
} // end namespace llvm
Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -32,6 +32,7 @@
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/Frontend/FrontendDiagnostic.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/FPEnv.h"
@@ -104,6 +105,14 @@
if (getLangOpts().OpenMP && CurFn)
CGM.getOpenMPRuntime().functionFinished(*this);
+
+ // If we have an OpenMPIRBuilder we want to finalize functions (incl.
+ // outlining etc) at some point. Doing it once the function codegen is done
+ // seems to be a reasonable spot. We do it here, as opposed to the deletion
+ // time of the CodeGenModule, because we have to ensure the IR has not yet
+ // been "emitted" to the outside, thus, modifications are still sensible.
+ if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder())
+ OMPBuilder->finalize();
}
// Map the LangOption for rounding mode into
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits