https://github.com/hassnaaHamdi updated https://github.com/llvm/llvm-project/pull/159685
>From 0b5d559f7d2ad8de2c12787904b5f09d5682006e Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Mon, 15 Sep 2025 23:41:28 +0000 Subject: [PATCH 1/3] [WPD]: Enable speculative devirtualizatoin. - Add cl::opt 'devirtualize-speculatively' to enable it. - Flag is disabled by default. - It works regardless of the visibility of the object. --- .../lib/Transforms/IPO/WholeProgramDevirt.cpp | 64 +++++++-- .../speculative-devirt-single-impl.ll | 131 ++++++++++++++++++ .../virtual-const-prop-check.ll | 7 + 3 files changed, 187 insertions(+), 15 deletions(-) create mode 100644 llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 09bffa7bf5846..64f574be8fd0e 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -24,7 +24,8 @@ // returns 0, or a single vtable's function returns 1, replace each virtual // call with a comparison of the vptr against that vtable's address. // -// This pass is intended to be used during the regular and thin LTO pipelines: +// This pass is intended to be used during the regular/thin and non-LTO +// pipelines: // // During regular LTO, the pass determines the best optimization for each // virtual call and applies the resolutions directly to virtual calls that are @@ -48,6 +49,14 @@ // is supported. // - Import phase: (same as with hybrid case above). // +// During Speculative devirtualization mode -not restricted to LTO-: +// - The pass applies speculative devirtualization without requiring any type of +// visibility. +// - Skips other features like virtual constant propagation, uniform return +// value optimization, unique return value optimization and branch funnels as +// they need LTO. +// - This mode is enabled via 'devirtualize-speculatively' flag. +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/WholeProgramDevirt.h" @@ -61,7 +70,9 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" @@ -120,6 +131,13 @@ STATISTIC(NumVirtConstProp1Bit, "Number of 1 bit virtual constant propagations"); STATISTIC(NumVirtConstProp, "Number of virtual constant propagations"); +// TODO: This option eventually should support any public visibility vtables +// with/out LTO. +static cl::opt<bool> ClDevirtualizeSpeculatively( + "devirtualize-speculatively", + cl::desc("Enable speculative devirtualization optimization"), + cl::init(false)); + static cl::opt<PassSummaryAction> ClSummaryAction( "wholeprogramdevirt-summary-action", cl::desc("What to do with the summary when running this pass"), @@ -1083,10 +1101,10 @@ bool DevirtModule::tryFindVirtualCallTargets( if (!TM.Bits->GV->isConstant()) return false; - // We cannot perform whole program devirtualization analysis on a vtable - // with public LTO visibility. - if (TM.Bits->GV->getVCallVisibility() == - GlobalObject::VCallVisibilityPublic) + // Without ClDevirtualizeSpeculatively, we cannot perform whole program + // devirtualization analysis on a vtable with public LTO visibility. + if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() == + GlobalObject::VCallVisibilityPublic) return false; Function *Fn = nullptr; @@ -1105,6 +1123,12 @@ bool DevirtModule::tryFindVirtualCallTargets( if (Fn->getName() == "__cxa_pure_virtual") continue; + // In most cases empty functions will be overridden by the + // implementation of the derived class, so we can skip them. + if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() && + Fn->getInstructionCount() <= 1) + continue; + // We can disregard unreachable functions as possible call targets, as // unreachable functions shouldn't be called. if (mustBeUnreachableFunction(Fn, ExportSummary)) @@ -1223,10 +1247,12 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, CallTrap->setDebugLoc(CB.getDebugLoc()); } - // If fallback checking is enabled, add support to compare the virtual - // function pointer to the devirtualized target. In case of a mismatch, - // fall back to indirect call. - if (DevirtCheckMode == WPDCheckMode::Fallback) { + // If fallback checking or speculative devirtualization are enabled, + // add support to compare the virtual function pointer to the + // devirtualized target. In case of a mismatch, fall back to indirect + // call. + if (DevirtCheckMode == WPDCheckMode::Fallback || + ClDevirtualizeSpeculatively) { MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights(); // Version the indirect call site. If the called value is equal to the // given callee, 'NewInst' will be executed, otherwise the original call @@ -1325,10 +1351,10 @@ bool DevirtModule::trySingleImplDevirt( if (!IsExported) return false; - // If the only implementation has local linkage, we must promote to external - // to make it visible to thin LTO objects. We can only get here during the - // ThinLTO export phase. - if (TheFn->hasLocalLinkage()) { + // Out of speculative devirtualization mode, if the only implementation has + // local linkage, we must promote to external to make it visible to thin LTO + // objects. + if (!ClDevirtualizeSpeculatively && TheFn->hasLocalLinkage()) { std::string NewName = (TheFn->getName() + ".llvm.merged").str(); // Since we are renaming the function, any comdats with the same name must @@ -2350,6 +2376,11 @@ bool DevirtModule::run() { Function *TypeTestFunc = Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test); + // If we are in speculative devirtualization mode, we can work on the public + // type test intrinsics. + if (!TypeTestFunc && ClDevirtualizeSpeculatively) + TypeTestFunc = + Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test); Function *TypeCheckedLoadFunc = Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load); Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists( @@ -2472,8 +2503,11 @@ bool DevirtModule::run() { .WPDRes[S.first.ByteOffset]; if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos, S.first.ByteOffset, ExportSummary)) { - - if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) { + bool SingleImplDevirt = + trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res); + // Out of speculative devirtualization mode, Try to apply virtual constant + // propagation or branch funneling. + if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) { DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); diff --git a/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll new file mode 100644 index 0000000000000..b7d1bda8d133b --- /dev/null +++ b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll @@ -0,0 +1,131 @@ +; -stats requires asserts +; REQUIRES: asserts + +; Check that we can still devirtualize outside LTO mode when speculative devirtualization is enabled. +; Check that we skip devirtualization for empty functions in speculative devirtualization mode + +; RUN: opt -S -passes=wholeprogramdevirt -devirtualize-speculatively \ +; RUN: -pass-remarks=wholeprogramdevirt -stats %s 2>&1 | FileCheck %s + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: remark: devirt-single.cc:30:32: single-impl: devirtualized a call to vf +; CHECK: remark: devirt-single.cc:41:32: single-impl: devirtualized a call to vf +; CHECK: remark: devirt-single.cc:51:32: single-impl: devirtualized a call to vf +; CHECK: remark: devirt-single.cc:13:0: devirtualized vf +; CHECK-NOT: devirtualized + +@vt1 = constant [1 x ptr] [ptr @vf], !type !8 +@vt2 = constant [1 x ptr] [ptr @vf_empty], !type !12 + +define i1 @vf(ptr %this) #0 !dbg !7 { + ret i1 true +} + +; This should NOT be devirtualized because during non-lto empty functions +; are skipped. +define void @vf_empty(ptr %this) !dbg !11 { + ret void +} + +; CHECK: define void @call +define void @call(ptr %obj) #1 !dbg !5 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable + ; CHECK: if.true.direct_targ: + ; CHECK: call i1 @vf( + ; CHECK: if.false.orig_indirect: + ; CHECK: call i1 %fptr( + call i1 %fptr(ptr %obj), !dbg !6 + ret void +} + + +; CHECK: define void @call1 +define void @call1(ptr %obj) #1 !dbg !9 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable, align 8 + ; CHECK: call i1 %fptr + %1 = call i1 %fptr(ptr %obj), !dbg !10 + ret void +} +declare ptr @llvm.load.relative.i32(ptr, i32) + +@vt3 = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32) +], align 4, !type !15 + +; CHECK: define void @call2 +define void @call2(ptr %obj) #1 !dbg !13 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; CHECK: if.true.direct_targ: + ; CHECK: call i1 @vf( + ; CHECK: if.false.orig_indirect: + ; CHECK: call i1 %fptr( + call i1 %fptr(ptr %obj), !dbg !14 + ret void +} + +@_ZTV1A.local = private unnamed_addr constant { [3 x i32] } { [3 x i32] [ + i32 0, ; offset to top + i32 0, ; rtti + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32) ; vf_emptyunc offset +] }, align 4, !type !18 + +; CHECK: define void @call3 +define void @call3(ptr %obj) #1 !dbg !16 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid3") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 8) + ; CHECK: if.true.direct_targ: + ; CHECK: call i1 @vf( + ; CHECK: if.false.orig_indirect: + ; CHECK: call i1 %fptr( + call i1 %fptr(ptr %obj), !dbg !17 + ret void +} + + +declare i1 @llvm.type.test(ptr, metadata) +declare void @llvm.assume(i1) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.ident = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 4.0.0 (trunk 278098)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "devirt-single.cc", directory: ".") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{!"clang version 4.0.0 (trunk 278098)"} +!5 = distinct !DISubprogram(name: "call", linkageName: "_Z4callPv", scope: !1, file: !1, line: 29, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!6 = !DILocation(line: 30, column: 32, scope: !5) +!7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEv", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!8 = !{i32 0, !"typeid"} + +!9 = distinct !DISubprogram(name: "call1", linkageName: "_Z5call1Pv", scope: !1, file: !1, line: 31, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!10 = !DILocation(line: 35, column: 32, scope: !9) +!11 = distinct !DISubprogram(name: "vf_empty", linkageName: "_ZN3vt18vf_emptyEv", scope: !1, file: !1, line: 23, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!12 = !{i32 0, !"typeid1"} + +!13 = distinct !DISubprogram(name: "call2", linkageName: "_Z5call2Pv", scope: !1, file: !1, line: 40, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!14 = !DILocation(line: 41, column: 32, scope: !13) +!15 = !{i32 0, !"typeid2"} + +!16 = distinct !DISubprogram(name: "call3", linkageName: "_Z5call3Pv", scope: !1, file: !1, line: 50, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!17 = !DILocation(line: 51, column: 32, scope: !16) +!18 = !{i32 0, !"typeid3"} + + + +; CHECK: 1 wholeprogramdevirt - Number of whole program devirtualization targets +; CHECK: 3 wholeprogramdevirt - Number of single implementation devirtualizations diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll index d8f5c912e9a50..8327e1cfdf1d2 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll @@ -11,6 +11,9 @@ ; Check wildcard ; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt -wholeprogramdevirt-skip=vf?i1 %s 2>&1 | FileCheck %s --check-prefix=SKIP +; Check that no stats are reported in speculative devirtualization mode as the virtual const prop is disabled. +; RUN: opt -S -passes=wholeprogramdevirt -devirtualize-speculatively -stats %s 2>&1 | FileCheck %s --check-prefix=CHECK-SPECULATIVE-WPD + target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" @@ -225,3 +228,7 @@ declare ptr @llvm.load.relative.i32(ptr, i32) ; CHECK: 2 wholeprogramdevirt - Number of unique return value optimizations ; CHECK: 2 wholeprogramdevirt - Number of virtual constant propagations ; CHECK: 2 wholeprogramdevirt - Number of 1 bit virtual constant propagations + +; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of unique return value optimizations +; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of virtual constant propagations +; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of 1 bit virtual constant propagations >From e787aa2bb1274f511f37599f524e49ed41f8493c Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Tue, 23 Sep 2025 20:47:55 +0000 Subject: [PATCH 2/3] [WPD]: add devirt flag to conditionally enable speculative devirtualization in the pass when it gets launched by pass manager. --- .../llvm/Transforms/IPO/WholeProgramDevirt.h | 7 ++- .../lib/Transforms/IPO/WholeProgramDevirt.cpp | 48 ++++++++++++------- .../speculative-devirt-single-impl.ll | 3 +- 3 files changed, 38 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h index 7a03405b4f462..2e33a4098be1b 100644 --- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -226,11 +226,14 @@ struct WholeProgramDevirtPass : public PassInfoMixin<WholeProgramDevirtPass> { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; bool UseCommandLine = false; + bool DevirtSpeculatively = false; WholeProgramDevirtPass() : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {} WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) - : ExportSummary(ExportSummary), ImportSummary(ImportSummary) { + const ModuleSummaryIndex *ImportSummary, + bool DevirtSpeculatively = false) + : ExportSummary(ExportSummary), ImportSummary(ImportSummary), + DevirtSpeculatively(DevirtSpeculatively) { assert(!(ExportSummary && ImportSummary)); } LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &); diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 64f574be8fd0e..79fd0ebb942f8 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -636,9 +636,11 @@ struct DevirtModule { std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest; PatternList FunctionsToSkip; + const bool DevirtSpeculatively; DevirtModule(Module &M, ModuleAnalysisManager &MAM, ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) + const ModuleSummaryIndex *ImportSummary, + bool DevirtSpeculatively) : M(M), MAM(MAM), FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), ExportSummary(ExportSummary), ImportSummary(ImportSummary), @@ -651,7 +653,8 @@ struct DevirtModule { RemarksEnabled(areRemarksEnabled()), OREGetter([&](Function &F) -> OptimizationRemarkEmitter & { return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); - }) { + }), + DevirtSpeculatively(DevirtSpeculatively) { assert(!(ExportSummary && ImportSummary)); FunctionsToSkip.init(SkipFunctionNames); } @@ -765,7 +768,8 @@ struct DevirtModule { // Lower the module using the action and summary passed as command line // arguments. For testing purposes only. - static bool runForTesting(Module &M, ModuleAnalysisManager &MAM); + static bool runForTesting(Module &M, ModuleAnalysisManager &MAM, + bool DevirtSpeculatively); }; struct DevirtIndex { @@ -808,11 +812,22 @@ struct DevirtIndex { PreservedAnalyses WholeProgramDevirtPass::run(Module &M, ModuleAnalysisManager &MAM) { if (UseCommandLine) { - if (!DevirtModule::runForTesting(M, MAM)) + if (!DevirtModule::runForTesting(M, MAM, ClDevirtualizeSpeculatively)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } - if (!DevirtModule(M, MAM, ExportSummary, ImportSummary).run()) + + std::optional<ModuleSummaryIndex> Index; + if (!ExportSummary && !ImportSummary && DevirtSpeculatively) { + // Build the ExportSummary from the module. + assert(!ExportSummary && + "ExportSummary is expected to be empty in non-LTO mode"); + ProfileSummaryInfo PSI(M); + Index.emplace(buildModuleSummaryIndex(M, nullptr, &PSI)); + ExportSummary = Index.has_value() ? &Index.value() : nullptr; + } + if (!DevirtModule(M, MAM, ExportSummary, ImportSummary, DevirtSpeculatively) + .run()) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } @@ -1008,7 +1023,8 @@ static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary) { return ErrorSuccess(); } -bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM) { +bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM, + bool DevirtSpeculatively) { std::unique_ptr<ModuleSummaryIndex> Summary = std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false); @@ -1037,7 +1053,8 @@ bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM) { ClSummaryAction == PassSummaryAction::Export ? Summary.get() : nullptr, ClSummaryAction == PassSummaryAction::Import ? Summary.get() - : nullptr) + : nullptr, + DevirtSpeculatively) .run(); if (!ClWriteSummary.empty()) { @@ -1101,10 +1118,10 @@ bool DevirtModule::tryFindVirtualCallTargets( if (!TM.Bits->GV->isConstant()) return false; - // Without ClDevirtualizeSpeculatively, we cannot perform whole program + // Without DevirtSpeculatively, we cannot perform whole program // devirtualization analysis on a vtable with public LTO visibility. - if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() == - GlobalObject::VCallVisibilityPublic) + if (!DevirtSpeculatively && TM.Bits->GV->getVCallVisibility() == + GlobalObject::VCallVisibilityPublic) return false; Function *Fn = nullptr; @@ -1125,7 +1142,7 @@ bool DevirtModule::tryFindVirtualCallTargets( // In most cases empty functions will be overridden by the // implementation of the derived class, so we can skip them. - if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() && + if (DevirtSpeculatively && Fn->getReturnType()->isVoidTy() && Fn->getInstructionCount() <= 1) continue; @@ -1251,8 +1268,7 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, // add support to compare the virtual function pointer to the // devirtualized target. In case of a mismatch, fall back to indirect // call. - if (DevirtCheckMode == WPDCheckMode::Fallback || - ClDevirtualizeSpeculatively) { + if (DevirtCheckMode == WPDCheckMode::Fallback || DevirtSpeculatively) { MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights(); // Version the indirect call site. If the called value is equal to the // given callee, 'NewInst' will be executed, otherwise the original call @@ -1354,7 +1370,7 @@ bool DevirtModule::trySingleImplDevirt( // Out of speculative devirtualization mode, if the only implementation has // local linkage, we must promote to external to make it visible to thin LTO // objects. - if (!ClDevirtualizeSpeculatively && TheFn->hasLocalLinkage()) { + if (!DevirtSpeculatively && TheFn->hasLocalLinkage()) { std::string NewName = (TheFn->getName() + ".llvm.merged").str(); // Since we are renaming the function, any comdats with the same name must @@ -2378,7 +2394,7 @@ bool DevirtModule::run() { Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test); // If we are in speculative devirtualization mode, we can work on the public // type test intrinsics. - if (!TypeTestFunc && ClDevirtualizeSpeculatively) + if (!TypeTestFunc && DevirtSpeculatively) TypeTestFunc = Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test); Function *TypeCheckedLoadFunc = @@ -2507,7 +2523,7 @@ bool DevirtModule::run() { trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res); // Out of speculative devirtualization mode, Try to apply virtual constant // propagation or branch funneling. - if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) { + if (!SingleImplDevirt && !DevirtSpeculatively) { DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); diff --git a/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll index b7d1bda8d133b..9ac09f1517e14 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll @@ -23,8 +23,7 @@ define i1 @vf(ptr %this) #0 !dbg !7 { ret i1 true } -; This should NOT be devirtualized because during non-lto empty functions -; are skipped. +; This should NOT be devirtualized because empty functions are skipped during non-lto. define void @vf_empty(ptr %this) !dbg !11 { ret void } >From 9354a21255b55688f65166fd6de86fddf81da9f5 Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Wed, 24 Sep 2025 07:44:55 +0000 Subject: [PATCH 3/3] [Clang]: Enable speculative devirtualization --- clang/docs/UsersManual.rst | 9 +++++++++ clang/include/clang/Basic/CodeGenOptions.def | 2 ++ clang/include/clang/Driver/Options.td | 9 +++++++-- clang/lib/CodeGen/BackendUtil.cpp | 1 + clang/lib/CodeGen/CGClass.cpp | 14 ++++++++------ clang/lib/CodeGen/CGVTables.cpp | 6 ++++-- clang/lib/CodeGen/ItaniumCXXABI.cpp | 13 ++++++++----- clang/lib/Driver/ToolChains/Clang.cpp | 7 +++++++ clang/test/CodeGenCXX/type-metadata.cpp | 8 ++++++++ clang/test/Driver/clang_f_opts.c | 2 -- llvm/include/llvm/Passes/PassBuilder.h | 4 ++++ llvm/lib/Passes/PassBuilderPipelines.cpp | 18 ++++++++++++++++++ 12 files changed, 76 insertions(+), 17 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index a8bbf146431ea..241f374bcf4c3 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2313,6 +2313,13 @@ are listed below. This enables better devirtualization. Turned off by default, because it is still experimental. +.. option:: -fdevirtualize-speculatively + + Enable speculative devirtualization optimization, such as single-implementation + devirtualization. This optimization is used out of LTO mode for now. + Turned off by default. + TODO: Enable for LTO mode. + .. option:: -fwhole-program-vtables Enable whole-program vtable optimizations, such as single-implementation @@ -5161,6 +5168,8 @@ Execute ``clang-cl /?`` to see a list of supported options: -fstandalone-debug Emit full debug info for all types used by the program -fstrict-aliasing Enable optimizations based on strict aliasing rules -fsyntax-only Run the preprocessor, parser and semantic analysis stages + -fdevirtualize-speculatively + Enables speculative devirtualization optimization. -fwhole-program-vtables Enables whole-program vtable optimization. Requires -flto -gcodeview-ghash Emit type record hashes in a .debug$H section -gcodeview Generate CodeView debug information diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 872f73ebf3810..38174cf13cadf 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -358,6 +358,8 @@ VALUE_CODEGENOPT(WarnStackSize , 32, UINT_MAX, Benign) ///< Set via -fwarn-s CODEGENOPT(NoStackArgProbe, 1, 0, Benign) ///< Set when -mno-stack-arg-probe is used CODEGENOPT(EmitLLVMUseLists, 1, 0, Benign) ///< Control whether to serialize use-lists. +CODEGENOPT(DevirtualizeSpeculatively, 1, 0, Benign) ///< Whether to apply the speculative + /// devirtualization optimization. CODEGENOPT(WholeProgramVTables, 1, 0, Benign) ///< Whether to apply whole-program /// vtable optimization. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 47d328f862e07..4173ae341afcd 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4432,6 +4432,13 @@ defm new_infallible : BoolFOption<"new-infallible", BothFlags<[], [ClangOption, CC1Option], " treating throwing global C++ operator new as always returning valid memory " "(annotates with __attribute__((returns_nonnull)) and throw()). This is detectable in source.">>; +defm devirtualize_speculatively + : BoolFOption<"devirtualize-speculatively", + CodeGenOpts<"DevirtualizeSpeculatively">, DefaultFalse, + PosFlag<SetTrue, [], [], + "Enables speculative devirtualization optimization.">, + NegFlag<SetFalse>, + BothFlags<[], [ClangOption, CLOption, CC1Option]>>; defm whole_program_vtables : BoolFOption<"whole-program-vtables", CodeGenOpts<"WholeProgramVTables">, DefaultFalse, PosFlag<SetTrue, [], [ClangOption, CC1Option], @@ -7013,8 +7020,6 @@ defm variable_expansion_in_unroller : BooleanFFlag<"variable-expansion-in-unroll defm web : BooleanFFlag<"web">, Group<clang_ignored_gcc_optimization_f_Group>; defm whole_program : BooleanFFlag<"whole-program">, Group<clang_ignored_gcc_optimization_f_Group>; defm devirtualize : BooleanFFlag<"devirtualize">, Group<clang_ignored_gcc_optimization_f_Group>; -defm devirtualize_speculatively : BooleanFFlag<"devirtualize-speculatively">, - Group<clang_ignored_gcc_optimization_f_Group>; // Generic gfortran options. def A_DASH : Joined<["-"], "A-">, Group<gfortran_Group>; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 8c99af2bdff83..790467dc557a5 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -907,6 +907,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // non-integrated assemblers don't recognize .cgprofile section. PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO; + PTO.DevirtualizeSpeculatively = CodeGenOpts.DevirtualizeSpeculatively; LoopAnalysisManager LAM; FunctionAnalysisManager FAM; diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 8346ee3aa6a8d..bf1724e347a7f 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -2771,10 +2771,11 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, SourceLocation Loc) { if (SanOpts.has(SanitizerKind::CFIVCall)) EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); - else if (CGM.getCodeGenOpts().WholeProgramVTables && - // Don't insert type test assumes if we are forcing public - // visibility. - !CGM.AlwaysHasLTOVisibilityPublic(RD)) { + else if ((CGM.getCodeGenOpts().WholeProgramVTables && + // Don't insert type test assumes if we are forcing public + // visibility. + !CGM.AlwaysHasLTOVisibilityPublic(RD)) || + CGM.getCodeGenOpts().DevirtualizeSpeculatively) { CanQualType Ty = CGM.getContext().getCanonicalTagType(RD); llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(Ty); llvm::Value *TypeId = @@ -2932,8 +2933,9 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, } bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) { - if (!CGM.getCodeGenOpts().WholeProgramVTables || - !CGM.HasHiddenLTOVisibility(RD)) + if ((!CGM.getCodeGenOpts().WholeProgramVTables || + !CGM.HasHiddenLTOVisibility(RD)) && + !CGM.getCodeGenOpts().DevirtualizeSpeculatively) return false; if (CGM.getCodeGenOpts().VirtualFunctionElimination) diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index e14e883a55ac5..959ba2031acf4 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -1358,10 +1358,12 @@ llvm::GlobalObject::VCallVisibility CodeGenModule::GetVCallVisibilityLevel( void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, llvm::GlobalVariable *VTable, const VTableLayout &VTLayout) { - // Emit type metadata on vtables with LTO or IR instrumentation. + // Emit type metadata on vtables with LTO or IR instrumentation or + // speculative devirtualization. // In IR instrumentation, the type metadata is used to find out vtable // definitions (for type profiling) among all global variables. - if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr()) + if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr() && + !getCodeGenOpts().DevirtualizeSpeculatively) return; CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType()); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 7dc2eaf1e9f75..71d36b5d6b153 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -717,9 +717,10 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination && CGM.HasHiddenLTOVisibility(RD); bool ShouldEmitWPDInfo = - CGM.getCodeGenOpts().WholeProgramVTables && - // Don't insert type tests if we are forcing public visibility. - !CGM.AlwaysHasLTOVisibilityPublic(RD); + (CGM.getCodeGenOpts().WholeProgramVTables && + // Don't insert type tests if we are forcing public visibility. + !CGM.AlwaysHasLTOVisibilityPublic(RD)) || + CGM.getCodeGenOpts().DevirtualizeSpeculatively; llvm::Value *VirtualFn = nullptr; { @@ -2114,13 +2115,15 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, // definitions to ensure we associate derived classes with base classes // defined in headers but with a strong definition only in a shared library. if (!VTable->isDeclarationForLinker() || - CGM.getCodeGenOpts().WholeProgramVTables) { + CGM.getCodeGenOpts().WholeProgramVTables || + CGM.getCodeGenOpts().DevirtualizeSpeculatively) { CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout); // For available_externally definitions, add the vtable to // @llvm.compiler.used so that it isn't deleted before whole program // analysis. if (VTable->isDeclarationForLinker()) { - assert(CGM.getCodeGenOpts().WholeProgramVTables); + assert(CGM.getCodeGenOpts().WholeProgramVTables || + CGM.getCodeGenOpts().DevirtualizeSpeculatively); CGM.addCompilerUsedGlobal(VTable); } } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 63efb0f02baa8..d91afcbf3fd6c 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7785,6 +7785,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, addOpenMPHostOffloadingArgs(C, JA, Args, CmdArgs); + // Temporarily disable this for LTO if it's not explicitly enabled. + // TODO: enable it by default for LTO also. + if (Args.hasFlag(options::OPT_fdevirtualize_speculatively, + options::OPT_fno_devirtualize_speculatively, + /*Default value*/ false)) + CmdArgs.push_back("-fdevirtualize-speculatively"); + bool VirtualFunctionElimination = Args.hasFlag(options::OPT_fvirtual_function_elimination, options::OPT_fno_virtual_function_elimination, false); diff --git a/clang/test/CodeGenCXX/type-metadata.cpp b/clang/test/CodeGenCXX/type-metadata.cpp index 1cb2fed8db3e6..61d36204942dc 100644 --- a/clang/test/CodeGenCXX/type-metadata.cpp +++ b/clang/test/CodeGenCXX/type-metadata.cpp @@ -14,6 +14,9 @@ // RUN: %clang_cc1 -O2 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=ITANIUM-OPT --check-prefix=ITANIUM-OPT-LAYOUT %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TT-MS %s +// Test for the speculative devirtualization feature in nonlto mode: +// RUN: %clang_cc1 -triple x86_64-unknown-linux -fdevirtualize-speculatively -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT %s + // Tests for cfi + whole-program-vtables: // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=ITANIUM-HIDDEN --check-prefix=ITANIUM-COMMON-MD --check-prefix=TC-ITANIUM --check-prefix=ITANIUM-NO-RV-MD %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TC-MS %s @@ -178,6 +181,7 @@ void af(A *a) { // TT-ITANIUM-HIDDEN: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TT-ITANIUM-DEFAULT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TT-MS: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"?AUA@@") + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TC-ITANIUM: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A") // TC-ITANIUM-RV: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A") // TC-MS: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@") @@ -212,6 +216,7 @@ void df1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@@") + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]]) // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]]) // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@") @@ -224,6 +229,7 @@ void dg1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUB@@") + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTS1B") // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTS1B") // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUB@@") @@ -236,6 +242,7 @@ void dh1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 16, metadata ![[DTYPE]]) // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE]]) // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE:[0-9]+]]) @@ -297,6 +304,7 @@ void f(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@test2@@") + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTSN5test21DE") // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTSN5test21DE") // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@test2@@") diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index eb3994ddabcd3..1e6e1253a7a09 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -371,7 +371,6 @@ // RUN: -ftree-ter \ // RUN: -ftree-vrp \ // RUN: -fno-devirtualize \ -// RUN: -fno-devirtualize-speculatively \ // RUN: -fslp-vectorize-aggressive \ // RUN: -fno-slp-vectorize-aggressive \ // RUN: %s 2>&1 | FileCheck --check-prefix=CHECK-WARNING %s @@ -430,7 +429,6 @@ // CHECK-WARNING-DAG: optimization flag '-ftree-ter' is not supported // CHECK-WARNING-DAG: optimization flag '-ftree-vrp' is not supported // CHECK-WARNING-DAG: optimization flag '-fno-devirtualize' is not supported -// CHECK-WARNING-DAG: optimization flag '-fno-devirtualize-speculatively' is not supported // CHECK-WARNING-DAG: the flag '-fslp-vectorize-aggressive' has been deprecated and will be ignored // CHECK-WARNING-DAG: the flag '-fno-slp-vectorize-aggressive' has been deprecated and will be ignored diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 2742ec1b71b7e..c1fcde24cc420 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -102,6 +102,10 @@ class PipelineTuningOptions { // analyses after various module->function or cgscc->function adaptors in the // default pipelines. bool EagerlyInvalidateAnalyses; + + // Tuning option to enable/disable speculative devirtualization. + // Its default value is false. + bool DevirtualizeSpeculatively; }; /// This class provides access to building LLVM's passes. diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 79642e650ac83..c8303b6f18a6b 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -322,6 +322,7 @@ PipelineTuningOptions::PipelineTuningOptions() { MergeFunctions = EnableMergeFunctions; InlinerThreshold = -1; EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; + DevirtualizeSpeculatively = false; } namespace llvm { @@ -1635,6 +1636,23 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, if (!LTOPreLink) MPM.addPass(RelLookupTableConverterPass()); + if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) { + MPM.addPass(WholeProgramDevirtPass(/*ExportSummary*/ nullptr, + /*ImportSummary*/ nullptr, + /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively)); + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, + lowertypetests::DropTestKind::Assume)); + if (EnableModuleInliner) { + MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level), + UseInlineAdvisor, + ThinOrFullLTOPhase::None)); + } else { + MPM.addPass(ModuleInlinerWrapperPass( + getInlineParamsFromOptLevel(Level), + /* MandatoryFirst */ true, + InlineContext{ThinOrFullLTOPhase::None, InlinePass::CGSCCInliner})); + } + } return MPM; } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
