Author: Aditya Kumar
Date: 2020-12-18T17:06:17-08:00
New Revision: 1ab4db0f847fa1ddd394dbf54a5051b626eab160
URL:
https://github.com/llvm/llvm-project/commit/1ab4db0f847fa1ddd394dbf54a5051b626eab160
DIFF:
https://github.com/llvm/llvm-project/commit/1ab4db0f847fa1ddd394dbf54a5051b626eab160.diff
LOG: [HotColdSplit] Reflect full cost of parameters in split penalty
Make the penalty for splitting a region more accurately reflect the cost
of materializing all of the inputs/outputs to/from the region.
This almost entirely eliminates code growth within functions which
undergo splitting in key internal frameworks, and reduces the size of
those frameworks between 2.6% to 3%.
rdar://49167240
Patch by: Vedant Kumar(@vsk)
Reviewers: hiraditya,rjf,t.p.northover
Reviewed By: hiraditya,rjf
Differential Revision: https://reviews.llvm.org/D59715
Added:
Modified:
llvm/lib/Transforms/IPO/HotColdSplitting.cpp
llvm/test/Transforms/CodeExtractor/extract-assume.ll
llvm/test/Transforms/HotColdSplit/apply-penalty-for-inputs.ll
llvm/test/Transforms/HotColdSplit/apply-penalty-for-outputs.ll
llvm/test/Transforms/HotColdSplit/apply-successor-penalty.ll
llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
Removed:
diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index b25b789d1dae..aa708ee520b1 100644
--- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -67,6 +67,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include
+#include
#include
#include
@@ -96,6 +97,10 @@ static cl::opt
cl::desc("Name for the section containing cold functions "
"extracted by hot-cold splitting."));
+static cl::opt MaxParametersForSplit(
+"hotcoldsplit-max-params", cl::init(4), cl::Hidden,
+cl::desc("Maximum number of parameters for a split function"));
+
namespace {
// Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
// this function unless you modify the MBB version as well.
@@ -257,18 +262,6 @@ static int getOutliningPenalty(ArrayRef
Region,
if (SplittingThreshold <= 0)
return Penalty;
- // The typical code size cost for materializing an argument for the outlined
- // call.
- LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumInputs << " inputs\n");
- const int CostForArgMaterialization = TargetTransformInfo::TCC_Basic;
- Penalty += CostForArgMaterialization * NumInputs;
-
- // The typical code size cost for an output alloca, its associated store, and
- // its associated reload.
- LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputs << " outputs\n");
- const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic;
- Penalty += CostForRegionOutput * NumOutputs;
-
// Find the number of distinct exit blocks for the region. Use a conservative
// check to determine whether control returns from the region.
bool NoBlocksReturn = true;
@@ -289,6 +282,48 @@ static int getOutliningPenalty(ArrayRef
Region,
}
}
+ // Count the number of phis in exit blocks with >= 2 incoming values from the
+ // outlining region. These phis are split (\ref severSplitPHINodesOfExits),
+ // and new outputs are created to supply the split phis. CodeExtractor can't
+ // report these new outputs until extraction begins, but it's important to
+ // factor the cost of the outputs into the cost calculation.
+ unsigned NumSplitExitPhis = 0;
+ for (BasicBlock *ExitBB : SuccsOutsideRegion) {
+for (PHINode &PN : ExitBB->phis()) {
+ // Find all incoming values from the outlining region.
+ int NumIncomingVals = 0;
+ for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
+if (find(Region, PN.getIncomingBlock(i)) != Region.end()) {
+ ++NumIncomingVals;
+ if (NumIncomingVals > 1) {
+++NumSplitExitPhis;
+break;
+ }
+}
+}
+ }
+
+ // Apply a penalty for calling the split function. Factor in the cost of
+ // materializing all of the parameters.
+ int NumOutputsAndSplitPhis = NumOutputs + NumSplitExitPhis;
+ int NumParams = NumInputs + NumOutputsAndSplitPhis;
+ if (NumParams > MaxParametersForSplit) {
+LLVM_DEBUG(dbgs() << NumInputs << " inputs and " << NumOutputsAndSplitPhis
+ << " outputs exceeds parameter limit ("
+ << MaxParametersForSplit << ")\n");
+return std::numeric_limits::max();
+ }
+ const int CostForArgMaterialization = 2 * TargetTransformInfo::TCC_Basic;
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumParams << " params\n");
+ Penalty += CostForArgMaterialization * NumParams;
+
+ // Apply the typical code size cost for an output alloca and its associated
+ // reload in the caller. Also