https://github.com/hassnaaHamdi updated https://github.com/llvm/llvm-project/pull/159685
>From ab5380147909df71338a0f1e8ae20d432dd2eb70 Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Tue, 23 Sep 2025 20:47:55 +0000 Subject: [PATCH 1/9] [WPD]: Add devirtualization pass to the pass pipeline. - Build ExportSummary locally when they are not given. --- llvm/include/llvm/Passes/PassBuilder.h | 4 ++ .../llvm/Transforms/IPO/WholeProgramDevirt.h | 7 ++- llvm/lib/Passes/PassBuilderPipelines.cpp | 19 ++++++++ .../lib/Transforms/IPO/WholeProgramDevirt.cpp | 46 +++++++++++++------ 4 files changed, 59 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 8fa21f2cb2dd6..00d4874d5109b 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -99,6 +99,10 @@ class PipelineTuningOptions { // analyses after various module->function or cgscc->function adaptors in the // default pipelines. bool EagerlyInvalidateAnalyses; + + // Tuning option to enable/disable speculative devirtualization. + // Its default value is false. + bool DevirtualizeSpeculatively; }; /// This class provides access to building LLVM's passes. diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h index 7a03405b4f462..2e33a4098be1b 100644 --- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -226,11 +226,14 @@ struct WholeProgramDevirtPass : public PassInfoMixin<WholeProgramDevirtPass> { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; bool UseCommandLine = false; + bool DevirtSpeculatively = false; WholeProgramDevirtPass() : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {} WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) - : ExportSummary(ExportSummary), ImportSummary(ImportSummary) { + const ModuleSummaryIndex *ImportSummary, + bool DevirtSpeculatively = false) + : ExportSummary(ExportSummary), ImportSummary(ImportSummary), + DevirtSpeculatively(DevirtSpeculatively) { assert(!(ExportSummary && ImportSummary)); } LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &); diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index c6beb3fdf09bd..2bb7ec20a8bb7 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -326,6 +326,7 @@ PipelineTuningOptions::PipelineTuningOptions() { MergeFunctions = EnableMergeFunctions; InlinerThreshold = -1; EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; + DevirtualizeSpeculatively = false; } namespace llvm { @@ -1655,6 +1656,24 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, if (!LTOPreLink) MPM.addPass(RelLookupTableConverterPass()); + if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) { + MPM.addPass(WholeProgramDevirtPass( + /*ExportSummary*/ nullptr, + /*ImportSummary*/ nullptr, + /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively)); + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, + lowertypetests::DropTestKind::Assume)); + if (EnableModuleInliner) { + MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level), + UseInlineAdvisor, + ThinOrFullLTOPhase::None)); + } else { + MPM.addPass(ModuleInlinerWrapperPass( + getInlineParamsFromOptLevel(Level), + /* MandatoryFirst */ true, + InlineContext{ThinOrFullLTOPhase::None, InlinePass::CGSCCInliner})); + } + } return MPM; } diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 4642da0abdc13..7aa90eefd0d96 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -628,9 +628,11 @@ struct DevirtModule { std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest; PatternList FunctionsToSkip; + const bool DevirtSpeculatively; DevirtModule(Module &M, ModuleAnalysisManager &MAM, ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) + const ModuleSummaryIndex *ImportSummary, + bool DevirtSpeculatively) : M(M), MAM(MAM), FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), ExportSummary(ExportSummary), ImportSummary(ImportSummary), @@ -643,7 +645,8 @@ struct DevirtModule { RemarksEnabled(areRemarksEnabled()), OREGetter([&](Function &F) -> OptimizationRemarkEmitter & { return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); - }) { + }), + DevirtSpeculatively(DevirtSpeculatively) { assert(!(ExportSummary && ImportSummary)); FunctionsToSkip.init(SkipFunctionNames); } @@ -757,7 +760,8 @@ struct DevirtModule { // Lower the module using the action and summary passed as command line // arguments. For testing purposes only. - static bool runForTesting(Module &M, ModuleAnalysisManager &MAM); + static bool runForTesting(Module &M, ModuleAnalysisManager &MAM, + bool DevirtSpeculatively); }; struct DevirtIndex { @@ -800,11 +804,22 @@ struct DevirtIndex { PreservedAnalyses WholeProgramDevirtPass::run(Module &M, ModuleAnalysisManager &MAM) { if (UseCommandLine) { - if (!DevirtModule::runForTesting(M, MAM)) + if (!DevirtModule::runForTesting(M, MAM, ClDevirtualizeSpeculatively)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } - if (!DevirtModule(M, MAM, ExportSummary, ImportSummary).run()) + + std::optional<ModuleSummaryIndex> Index; + if (!ExportSummary && !ImportSummary && DevirtSpeculatively) { + // Build the ExportSummary from the module. + assert(!ExportSummary && + "ExportSummary is expected to be empty in non-LTO mode"); + ProfileSummaryInfo PSI(M); + Index.emplace(buildModuleSummaryIndex(M, nullptr, &PSI)); + ExportSummary = Index.has_value() ? &Index.value() : nullptr; + } + if (!DevirtModule(M, MAM, ExportSummary, ImportSummary, DevirtSpeculatively) + .run()) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } @@ -1002,7 +1017,8 @@ static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary) { return ErrorSuccess(); } -bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM) { +bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM, + bool DevirtSpeculatively) { std::unique_ptr<ModuleSummaryIndex> Summary = std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false); @@ -1031,7 +1047,8 @@ bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM) { ClSummaryAction == PassSummaryAction::Export ? Summary.get() : nullptr, ClSummaryAction == PassSummaryAction::Import ? Summary.get() - : nullptr) + : nullptr, + DevirtSpeculatively) .run(); if (!ClWriteSummary.empty()) { @@ -1095,10 +1112,10 @@ bool DevirtModule::tryFindVirtualCallTargets( if (!TM.Bits->GV->isConstant()) return false; - // Without ClDevirtualizeSpeculatively, we cannot perform whole program + // Without DevirtSpeculatively, we cannot perform whole program // devirtualization analysis on a vtable with public LTO visibility. - if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() == - GlobalObject::VCallVisibilityPublic) + if (!DevirtSpeculatively && TM.Bits->GV->getVCallVisibility() == + GlobalObject::VCallVisibilityPublic) return false; Function *Fn = nullptr; @@ -1119,7 +1136,7 @@ bool DevirtModule::tryFindVirtualCallTargets( // In most cases empty functions will be overridden by the // implementation of the derived class, so we can skip them. - if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() && + if (DevirtSpeculatively && Fn->getReturnType()->isVoidTy() && Fn->getInstructionCount() <= 1) continue; @@ -1240,8 +1257,7 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, // add support to compare the virtual function pointer to the // devirtualized target. In case of a mismatch, fall back to indirect // call. - if (DevirtCheckMode == WPDCheckMode::Fallback || - ClDevirtualizeSpeculatively) { + if (DevirtCheckMode == WPDCheckMode::Fallback || DevirtSpeculatively) { MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights(); // Version the indirect call site. If the called value is equal to the // given callee, 'NewInst' will be executed, otherwise the original call @@ -2365,7 +2381,7 @@ bool DevirtModule::run() { Function *PublicTypeTestFunc = nullptr; // If we are in speculative devirtualization mode, we can work on the public // type test intrinsics. - if (ClDevirtualizeSpeculatively) + if (DevirtSpeculatively) PublicTypeTestFunc = Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test); Function *TypeTestFunc = @@ -2501,7 +2517,7 @@ bool DevirtModule::run() { // Out of speculative devirtualization mode, Try to apply virtual constant // propagation or branch funneling. // TODO: This should eventually be enabled for non-public type tests. - if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) { + if (!SingleImplDevirt && !DevirtSpeculatively) { DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); >From 70c4b6f7eecfc9ec9e7249a0d2388e533538fa89 Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Wed, 24 Sep 2025 07:44:55 +0000 Subject: [PATCH 2/9] [Clang]: Enable speculative devirtualization --- clang/docs/UsersManual.rst | 9 +++++++++ clang/include/clang/Basic/CodeGenOptions.def | 2 ++ clang/include/clang/Options/Options.td | 12 +++++++++--- clang/lib/CodeGen/BackendUtil.cpp | 1 + clang/lib/CodeGen/CGClass.cpp | 14 ++++++++------ clang/lib/CodeGen/CGVTables.cpp | 6 ++++-- clang/lib/CodeGen/ItaniumCXXABI.cpp | 13 ++++++++----- clang/lib/Driver/ToolChains/Clang.cpp | 7 +++++++ clang/test/CodeGenCXX/type-metadata.cpp | 8 ++++++++ clang/test/Driver/clang_f_opts.c | 2 -- 10 files changed, 56 insertions(+), 18 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 42665da413660..eed2fe4676c30 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2328,6 +2328,13 @@ are listed below. This enables better devirtualization. Turned off by default, because it is still experimental. +.. option:: -fdevirtualize-speculatively + + Enable speculative devirtualization optimization, such as single-implementation + devirtualization. This optimization is used out of LTO mode for now. + Turned off by default. + TODO: Enable for LTO mode. + .. option:: -fwhole-program-vtables Enable whole-program vtable optimizations, such as single-implementation @@ -5216,6 +5223,8 @@ Execute ``clang-cl /?`` to see a list of supported options: -fstandalone-debug Emit full debug info for all types used by the program -fstrict-aliasing Enable optimizations based on strict aliasing rules -fsyntax-only Run the preprocessor, parser and semantic analysis stages + -fdevirtualize-speculatively + Enables speculative devirtualization optimization. -fwhole-program-vtables Enables whole-program vtable optimization. Requires -flto -gcodeview-ghash Emit type record hashes in a .debug$H section -gcodeview Generate CodeView debug information diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 76a6463881c6f..a059803c433e3 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -364,6 +364,8 @@ VALUE_CODEGENOPT(WarnStackSize , 32, UINT_MAX, Benign) ///< Set via -fwarn-s CODEGENOPT(NoStackArgProbe, 1, 0, Benign) ///< Set when -mno-stack-arg-probe is used CODEGENOPT(EmitLLVMUseLists, 1, 0, Benign) ///< Control whether to serialize use-lists. +CODEGENOPT(DevirtualizeSpeculatively, 1, 0, Benign) ///< Whether to apply the speculative + /// devirtualization optimization. CODEGENOPT(WholeProgramVTables, 1, 0, Benign) ///< Whether to apply whole-program /// vtable optimization. diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index d31bd7d6be322..df238a8087a46 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -4512,6 +4512,13 @@ defm new_infallible : BoolFOption<"new-infallible", BothFlags<[], [ClangOption, CC1Option], " treating throwing global C++ operator new as always returning valid memory " "(annotates with __attribute__((returns_nonnull)) and throw()). This is detectable in source.">>; +defm devirtualize_speculatively + : BoolFOption<"devirtualize-speculatively", + CodeGenOpts<"DevirtualizeSpeculatively">, DefaultFalse, + PosFlag<SetTrue, [], [], + "Enables speculative devirtualization optimization.">, + NegFlag<SetFalse>, + BothFlags<[], [ClangOption, CLOption, CC1Option]>>; defm whole_program_vtables : BoolFOption<"whole-program-vtables", CodeGenOpts<"WholeProgramVTables">, DefaultFalse, PosFlag<SetTrue, [], [ClangOption, CC1Option], @@ -7122,9 +7129,8 @@ defm variable_expansion_in_unroller : BooleanFFlag<"variable-expansion-in-unroll Group<clang_ignored_gcc_optimization_f_Group>; defm web : BooleanFFlag<"web">, Group<clang_ignored_gcc_optimization_f_Group>; defm whole_program : BooleanFFlag<"whole-program">, Group<clang_ignored_gcc_optimization_f_Group>; -defm devirtualize : BooleanFFlag<"devirtualize">, Group<clang_ignored_gcc_optimization_f_Group>; -defm devirtualize_speculatively : BooleanFFlag<"devirtualize-speculatively">, - Group<clang_ignored_gcc_optimization_f_Group>; +defm devirtualize : BooleanFFlag<"devirtualize">, + Group<clang_ignored_gcc_optimization_f_Group>; // Generic gfortran options. def A_DASH : Joined<["-"], "A-">, Group<gfortran_Group>; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index ec609db8d3a3c..df716e5bce23f 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -940,6 +940,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // non-integrated assemblers don't recognize .cgprofile section. PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO; + PTO.DevirtualizeSpeculatively = CodeGenOpts.DevirtualizeSpeculatively; LoopAnalysisManager LAM; FunctionAnalysisManager FAM; diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index f782b0cd17da4..6736126f7d316 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -2827,10 +2827,11 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, SourceLocation Loc) { if (SanOpts.has(SanitizerKind::CFIVCall)) EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); - else if (CGM.getCodeGenOpts().WholeProgramVTables && - // Don't insert type test assumes if we are forcing public - // visibility. - !CGM.AlwaysHasLTOVisibilityPublic(RD)) { + else if ((CGM.getCodeGenOpts().WholeProgramVTables && + // Don't insert type test assumes if we are forcing public + // visibility. + !CGM.AlwaysHasLTOVisibilityPublic(RD)) || + CGM.getCodeGenOpts().DevirtualizeSpeculatively) { CanQualType Ty = CGM.getContext().getCanonicalTagType(RD); llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(Ty); llvm::Value *TypeId = @@ -2988,8 +2989,9 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, } bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) { - if (!CGM.getCodeGenOpts().WholeProgramVTables || - !CGM.HasHiddenLTOVisibility(RD)) + if ((!CGM.getCodeGenOpts().WholeProgramVTables || + !CGM.HasHiddenLTOVisibility(RD)) && + !CGM.getCodeGenOpts().DevirtualizeSpeculatively) return false; if (CGM.getCodeGenOpts().VirtualFunctionElimination) diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index 3fbac308a9178..91550d0d31d83 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -1363,10 +1363,12 @@ llvm::GlobalObject::VCallVisibility CodeGenModule::GetVCallVisibilityLevel( void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, llvm::GlobalVariable *VTable, const VTableLayout &VTLayout) { - // Emit type metadata on vtables with LTO or IR instrumentation. + // Emit type metadata on vtables with LTO or IR instrumentation or + // speculative devirtualization. // In IR instrumentation, the type metadata is used to find out vtable // definitions (for type profiling) among all global variables. - if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr()) + if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr() && + !getCodeGenOpts().DevirtualizeSpeculatively) return; CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType()); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 65c47633bc5c4..41aa84fa8c07d 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -717,9 +717,10 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination && CGM.HasHiddenLTOVisibility(RD); bool ShouldEmitWPDInfo = - CGM.getCodeGenOpts().WholeProgramVTables && - // Don't insert type tests if we are forcing public visibility. - !CGM.AlwaysHasLTOVisibilityPublic(RD); + (CGM.getCodeGenOpts().WholeProgramVTables && + // Don't insert type tests if we are forcing public visibility. + !CGM.AlwaysHasLTOVisibilityPublic(RD)) || + CGM.getCodeGenOpts().DevirtualizeSpeculatively; llvm::Value *VirtualFn = nullptr; { @@ -2114,13 +2115,15 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, // definitions to ensure we associate derived classes with base classes // defined in headers but with a strong definition only in a shared library. if (!VTable->isDeclarationForLinker() || - CGM.getCodeGenOpts().WholeProgramVTables) { + CGM.getCodeGenOpts().WholeProgramVTables || + CGM.getCodeGenOpts().DevirtualizeSpeculatively) { CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout); // For available_externally definitions, add the vtable to // @llvm.compiler.used so that it isn't deleted before whole program // analysis. if (VTable->isDeclarationForLinker()) { - assert(CGM.getCodeGenOpts().WholeProgramVTables); + assert(CGM.getCodeGenOpts().WholeProgramVTables || + CGM.getCodeGenOpts().DevirtualizeSpeculatively); CGM.addCompilerUsedGlobal(VTable); } } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 0380568412e62..01d8180faf93e 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7745,6 +7745,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, addOpenMPHostOffloadingArgs(C, JA, Args, CmdArgs); + // Temporarily disable this for LTO if it's not explicitly enabled. + // TODO: enable it by default for LTO also. + if (Args.hasFlag(options::OPT_fdevirtualize_speculatively, + options::OPT_fno_devirtualize_speculatively, + /*Default value*/ false)) + CmdArgs.push_back("-fdevirtualize-speculatively"); + bool VirtualFunctionElimination = Args.hasFlag(options::OPT_fvirtual_function_elimination, options::OPT_fno_virtual_function_elimination, false); diff --git a/clang/test/CodeGenCXX/type-metadata.cpp b/clang/test/CodeGenCXX/type-metadata.cpp index 1cb2fed8db3e6..61d36204942dc 100644 --- a/clang/test/CodeGenCXX/type-metadata.cpp +++ b/clang/test/CodeGenCXX/type-metadata.cpp @@ -14,6 +14,9 @@ // RUN: %clang_cc1 -O2 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=ITANIUM-OPT --check-prefix=ITANIUM-OPT-LAYOUT %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TT-MS %s +// Test for the speculative devirtualization feature in nonlto mode: +// RUN: %clang_cc1 -triple x86_64-unknown-linux -fdevirtualize-speculatively -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT %s + // Tests for cfi + whole-program-vtables: // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=ITANIUM-HIDDEN --check-prefix=ITANIUM-COMMON-MD --check-prefix=TC-ITANIUM --check-prefix=ITANIUM-NO-RV-MD %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TC-MS %s @@ -178,6 +181,7 @@ void af(A *a) { // TT-ITANIUM-HIDDEN: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TT-ITANIUM-DEFAULT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TT-MS: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"?AUA@@") + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TC-ITANIUM: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A") // TC-ITANIUM-RV: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A") // TC-MS: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@") @@ -212,6 +216,7 @@ void df1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@@") + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]]) // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]]) // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@") @@ -224,6 +229,7 @@ void dg1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUB@@") + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTS1B") // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTS1B") // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUB@@") @@ -236,6 +242,7 @@ void dh1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 16, metadata ![[DTYPE]]) // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE]]) // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE:[0-9]+]]) @@ -297,6 +304,7 @@ void f(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@test2@@") + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTSN5test21DE") // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTSN5test21DE") // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@test2@@") diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index 765f9d6ae3212..e5a23270ea732 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -377,7 +377,6 @@ // RUN: -ftree-ter \ // RUN: -ftree-vrp \ // RUN: -fno-devirtualize \ -// RUN: -fno-devirtualize-speculatively \ // RUN: -fslp-vectorize-aggressive \ // RUN: -fno-slp-vectorize-aggressive \ // RUN: %s 2>&1 | FileCheck --check-prefix=CHECK-WARNING %s @@ -436,7 +435,6 @@ // CHECK-WARNING-DAG: optimization flag '-ftree-ter' is not supported // CHECK-WARNING-DAG: optimization flag '-ftree-vrp' is not supported // CHECK-WARNING-DAG: optimization flag '-fno-devirtualize' is not supported -// CHECK-WARNING-DAG: optimization flag '-fno-devirtualize-speculatively' is not supported // CHECK-WARNING-DAG: the flag '-fslp-vectorize-aggressive' has been deprecated and will be ignored // CHECK-WARNING-DAG: the flag '-fno-slp-vectorize-aggressive' has been deprecated and will be ignored >From 33509ca16b64de52c3970210f20f6139e2d4d65a Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Wed, 19 Nov 2025 03:49:34 +0000 Subject: [PATCH 3/9] Resolve review comments: - Improve documentation and comments - Update release notes. --- clang/docs/ReleaseNotes.rst | 1 + clang/docs/UsersManual.rst | 53 ++++++++++++++++++++---- clang/lib/CodeGen/CGClass.cpp | 4 +- clang/lib/CodeGen/ItaniumCXXABI.cpp | 10 +++-- clang/lib/Driver/ToolChains/Clang.cpp | 2 - clang/test/CodeGenCXX/type-metadata.cpp | 8 ---- llvm/lib/Passes/PassBuilderPipelines.cpp | 8 ++++ 7 files changed, 64 insertions(+), 22 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6838e926f4c9d..6cd3030a4c8c7 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -343,6 +343,7 @@ Modified Compiler Flags ----------------------- - The `-gkey-instructions` compiler flag is now enabled by default when DWARF is emitted for plain C/C++ and optimizations are enabled. (#GH149509) - The `-fconstexpr-steps` compiler flag now accepts value `0` to opt out of this limit. (#GH160440) +- The `-fdevirtualize-speculatively` compiler flag is now supported to enable speculative devirtualization of virtual function calls, it's disabled by default. (#GH159685) Removed Compiler Flags ------------------------- diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index eed2fe4676c30..acdd17cfea69d 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2328,13 +2328,6 @@ are listed below. This enables better devirtualization. Turned off by default, because it is still experimental. -.. option:: -fdevirtualize-speculatively - - Enable speculative devirtualization optimization, such as single-implementation - devirtualization. This optimization is used out of LTO mode for now. - Turned off by default. - TODO: Enable for LTO mode. - .. option:: -fwhole-program-vtables Enable whole-program vtable optimizations, such as single-implementation @@ -2359,6 +2352,52 @@ are listed below. pure ThinLTO, as all split regular LTO modules are merged and LTO linked with regular LTO. +.. option:: -fdevirtualize-speculatively + + Enable speculative devirtualization optimization where a virtual call + can be transformed into a direct call under the assumption that its + object is of a particular type. A runtime check is inserted to validate + the assumption before making the direct call, and if the check fails, + the original virtual call is made instead. This optimization can enable + more inlining opportunities and better optimization of the direct call. + This is different from other whole program devirtualization optimizations + that rely on global analysis and hidden visibility of the objects to prove + that the object is always of a particular type at a virtual call site. + This optimization doesn't require global analysis or hidden visibility. + This optimization doesn't devirtualize all virtual calls, but only + when there's a single implementation of the virtual function. + There could be a single implementaiton of the virtual function + either because the function is not overridden in any derived class, + or because there is a sinlge instantiated object that is using the funciton. + + Ex of IR before the optimization: + .. code-block:: llvm + %vtable = load ptr, ptr %BV, align 8, !tbaa !6 + %0 = tail call i1 @llvm.public.type.test(ptr %vtable, metadata !"_ZTS4Base") + tail call void @llvm.assume(i1 %0) + %0 = load ptr, ptr %vtable, align 8 + tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV) + ret void + + IR after the optimization: + .. code-block:: llvm + %vtable = load ptr, ptr %BV, align 8, !tbaa !12 + %0 = load ptr, ptr %vtable, align 8 + %1 = icmp eq ptr %0, @_ZN4Base17virtual_function1Ev + br i1 %1, label %if.true.direct_targ, label %if.false.orig_indirect, !prof !15 + if.true.direct_targ: ; preds = %entry + tail call void @_ZN4Base17virtual_function1Ev(ptr noundef nonnull align 8 dereferenceable(8) %BV) + br label %if.end.icp + if.false.orig_indirect: ; preds = %entry + tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV) + br label %if.end.icp + + if.end.icp: ; preds = %if.false.orig_indirect, %if.true.direct_targ + ret void + This feature is temporarily ignored at the LLVM side when LTO is enabled. + TODO: Update the comment when the LLVM side supports it. + This feature is turned off by default. + .. option:: -f[no-]unique-source-file-names When enabled, allows the compiler to assume that each object file diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 6736126f7d316..5fd240fa1d115 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -2827,9 +2827,9 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, SourceLocation Loc) { if (SanOpts.has(SanitizerKind::CFIVCall)) EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); + // Emit the type test assumes for the features of WPD (only when LTO + // visibility is NOT public) and speculative devirtualization. else if ((CGM.getCodeGenOpts().WholeProgramVTables && - // Don't insert type test assumes if we are forcing public - // visibility. !CGM.AlwaysHasLTOVisibilityPublic(RD)) || CGM.getCodeGenOpts().DevirtualizeSpeculatively) { CanQualType Ty = CGM.getContext().getCanonicalTagType(RD); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 41aa84fa8c07d..24ff12c18d69a 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -716,6 +716,9 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination && CGM.HasHiddenLTOVisibility(RD); + // TODO: Update this name not to be restricted to WPD only + // as we now emit the vtable info info for speculative devirtualization as + // well. bool ShouldEmitWPDInfo = (CGM.getCodeGenOpts().WholeProgramVTables && // Don't insert type tests if we are forcing public visibility. @@ -2111,9 +2114,10 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, // Always emit type metadata on non-available_externally definitions, and on // available_externally definitions if we are performing whole program - // devirtualization. For WPD we need the type metadata on all vtable - // definitions to ensure we associate derived classes with base classes - // defined in headers but with a strong definition only in a shared library. + // devirtualization or speculative devirtualization. We need the type metadata + // on all vtable definitions to ensure we associate derived classes with base + // classes defined in headers but with a strong definition only in a shared + // library. if (!VTable->isDeclarationForLinker() || CGM.getCodeGenOpts().WholeProgramVTables || CGM.getCodeGenOpts().DevirtualizeSpeculatively) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 01d8180faf93e..7187d1a158e01 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7745,8 +7745,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, addOpenMPHostOffloadingArgs(C, JA, Args, CmdArgs); - // Temporarily disable this for LTO if it's not explicitly enabled. - // TODO: enable it by default for LTO also. if (Args.hasFlag(options::OPT_fdevirtualize_speculatively, options::OPT_fno_devirtualize_speculatively, /*Default value*/ false)) diff --git a/clang/test/CodeGenCXX/type-metadata.cpp b/clang/test/CodeGenCXX/type-metadata.cpp index 61d36204942dc..1cb2fed8db3e6 100644 --- a/clang/test/CodeGenCXX/type-metadata.cpp +++ b/clang/test/CodeGenCXX/type-metadata.cpp @@ -14,9 +14,6 @@ // RUN: %clang_cc1 -O2 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=ITANIUM-OPT --check-prefix=ITANIUM-OPT-LAYOUT %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TT-MS %s -// Test for the speculative devirtualization feature in nonlto mode: -// RUN: %clang_cc1 -triple x86_64-unknown-linux -fdevirtualize-speculatively -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT %s - // Tests for cfi + whole-program-vtables: // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=ITANIUM-HIDDEN --check-prefix=ITANIUM-COMMON-MD --check-prefix=TC-ITANIUM --check-prefix=ITANIUM-NO-RV-MD %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TC-MS %s @@ -181,7 +178,6 @@ void af(A *a) { // TT-ITANIUM-HIDDEN: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TT-ITANIUM-DEFAULT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TT-MS: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"?AUA@@") - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TC-ITANIUM: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A") // TC-ITANIUM-RV: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A") // TC-MS: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@") @@ -216,7 +212,6 @@ void df1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@@") - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]]) // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]]) // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@") @@ -229,7 +224,6 @@ void dg1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUB@@") - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTS1B") // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTS1B") // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUB@@") @@ -242,7 +236,6 @@ void dh1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 16, metadata ![[DTYPE]]) // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE]]) // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE:[0-9]+]]) @@ -304,7 +297,6 @@ void f(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@test2@@") - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTSN5test21DE") // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTSN5test21DE") // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@test2@@") diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 2bb7ec20a8bb7..9a55e7ae6cd39 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1656,6 +1656,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, if (!LTOPreLink) MPM.addPass(RelLookupTableConverterPass()); + // Add devirtualization pass only when LTO is not enabled, as otherwise + // the pass is already enabled in the LTO pipeline. if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) { MPM.addPass(WholeProgramDevirtPass( /*ExportSummary*/ nullptr, @@ -1663,6 +1665,12 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively)); MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, lowertypetests::DropTestKind::Assume)); + // Given that the devirtualization creates more opportunities for inlining, + // we run the Inliner again here to maximize the optimization gain we + // get from devirtualization. + // Also, we can't run devirtualization before inlining because the + // devirtualization depends on the passes optimizing/eliminating vtable GVs + // and those passes are only effective after inlining. if (EnableModuleInliner) { MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level), UseInlineAdvisor, >From 3d6997c1f00ff1eb4a99f9bb4b77f37d8ed0d444 Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Wed, 19 Nov 2025 04:11:44 +0000 Subject: [PATCH 4/9] Add tests for emiting MD and ensuring that MD is not dropped on the way to the WPD at backend --- .../speculative-devirt-metadata.cpp | 77 ++++++++++++++++++ clang/test/CodeGenCXX/speculative-devirt.cpp | 78 +++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 clang/test/CodeGenCXX/speculative-devirt-metadata.cpp create mode 100644 clang/test/CodeGenCXX/speculative-devirt.cpp diff --git a/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp b/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp new file mode 100644 index 0000000000000..a20d71e086ed3 --- /dev/null +++ b/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp @@ -0,0 +1,77 @@ +// Test that Clang emits vtable metadata when speculative devirtualization is enabled. +// RUN: %clang_cc1 -triple x86_64-unknown-linux -fdevirtualize-speculatively -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT %s + +struct A { + A(); + virtual void f(); +}; + +struct B : virtual A { + B(); + virtual void g(); + virtual void h(); +}; + +namespace { + +struct D : B { + D(); + virtual void f(); + virtual void h(); +}; + +} + +A::A() {} +B::B() {} +D::D() {} + +void A::f() { +} + +void B::g() { +} + +void D::f() { +} + +void D::h() { +} + +void af(A *a) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + a->f(); +} + +void dg1(D *d) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1B") + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + d->g(); +} +void df1(D *d) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + d->f(); +} + +void dh1(D *d) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + d->h(); +} + + +D d; + +void foo() { + dg1(&d); + df1(&d); + dh1(&d); + + + struct FA : A { + void f() {} + } fa; + af(&fa); +} diff --git a/clang/test/CodeGenCXX/speculative-devirt.cpp b/clang/test/CodeGenCXX/speculative-devirt.cpp new file mode 100644 index 0000000000000..f1a69fd90573a --- /dev/null +++ b/clang/test/CodeGenCXX/speculative-devirt.cpp @@ -0,0 +1,78 @@ +// Test that the vtable metadata that are emitted by Clang when speculative devirtualization +// is enabled can be used by the WholeProgramDevirt pass without being dropped on the way. +// RUN: %clang_cc1 -O3 -triple x86_64-unknown-linux -fdevirtualize-speculatively -mllvm -print-before=wholeprogramdevirt -S %s 2>&1 | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT %s + +struct A { + A(); + virtual void f(); +}; + +struct B : virtual A { + B(); + virtual void g(); + virtual void h(); +}; + +namespace { + +struct D : B { + D(); + virtual void f(); + virtual void h(); +}; + +} + +A::A() {} +B::B() {} +D::D() {} + +void A::f() { +} + +void B::g() { +} + +void D::f() { +} + +void D::h() { +} + +void af(A *a) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + a->f(); +} + +void dg1(D *d) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1B") + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + d->g(); +} +void df1(D *d) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + d->f(); +} + +void dh1(D *d) { + // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) + // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + d->h(); +} + + +D d; + +void foo() { + dg1(&d); + df1(&d); + dh1(&d); + + + struct FA : A { + void f() {} + } fa; + af(&fa); +} >From d8863df53a4f44f38e66e8a9042a7f4de5411eeb Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Thu, 27 Nov 2025 01:46:24 +0000 Subject: [PATCH 5/9] resolve review comments --- clang/docs/UsersManual.rst | 10 +-- .../speculative-devirt-metadata.cpp | 19 ++--- clang/test/CodeGenCXX/speculative-devirt.cpp | 78 ------------------- .../WholeProgramDevirt/devirt-metadata.ll | 64 +++++++++++++++ 4 files changed, 79 insertions(+), 92 deletions(-) delete mode 100644 clang/test/CodeGenCXX/speculative-devirt.cpp create mode 100644 llvm/test/Transforms/WholeProgramDevirt/devirt-metadata.ll diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index acdd17cfea69d..07148afedad99 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2360,15 +2360,15 @@ are listed below. the assumption before making the direct call, and if the check fails, the original virtual call is made instead. This optimization can enable more inlining opportunities and better optimization of the direct call. - This is different from other whole program devirtualization optimizations + This is different from whole program devirtualization optimization that rely on global analysis and hidden visibility of the objects to prove that the object is always of a particular type at a virtual call site. This optimization doesn't require global analysis or hidden visibility. This optimization doesn't devirtualize all virtual calls, but only - when there's a single implementation of the virtual function. - There could be a single implementaiton of the virtual function + when there's a single implementation of the virtual function in the module. + There could be a single implementation of the virtual function either because the function is not overridden in any derived class, - or because there is a sinlge instantiated object that is using the funciton. + or because there is a single instantiated object that is using the function. Ex of IR before the optimization: .. code-block:: llvm @@ -2395,7 +2395,7 @@ are listed below. if.end.icp: ; preds = %if.false.orig_indirect, %if.true.direct_targ ret void This feature is temporarily ignored at the LLVM side when LTO is enabled. - TODO: Update the comment when the LLVM side supports it. + TODO: Update the comment when the LLVM side supports this feature for LTO. This feature is turned off by default. .. option:: -f[no-]unique-source-file-names diff --git a/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp b/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp index a20d71e086ed3..20d2ab9f46fe5 100644 --- a/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp +++ b/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp @@ -1,5 +1,5 @@ // Test that Clang emits vtable metadata when speculative devirtualization is enabled. -// RUN: %clang_cc1 -triple x86_64-unknown-linux -fdevirtualize-speculatively -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux -fdevirtualize-speculatively -emit-llvm -o - %s | FileCheck --check-prefix=CHECK %s struct A { A(); @@ -39,25 +39,26 @@ void D::h() { } void af(A *a) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + // CHECK: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") + // CHECK-NEXT: call void @llvm.assume(i1 [[P]]) a->f(); } void dg1(D *d) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1B") - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + // CHECK: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1B") + // CHECK-NEXT: call void @llvm.assume(i1 [[P]]) d->g(); } + void df1(D *d) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + // CHECK: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) + // CHECK-NEXT: call void @llvm.assume(i1 [[P]]) d->f(); } void dh1(D *d) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) + // CHECK: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) + // CHECK-NEXT: call void @llvm.assume(i1 [[P]]) d->h(); } diff --git a/clang/test/CodeGenCXX/speculative-devirt.cpp b/clang/test/CodeGenCXX/speculative-devirt.cpp deleted file mode 100644 index f1a69fd90573a..0000000000000 --- a/clang/test/CodeGenCXX/speculative-devirt.cpp +++ /dev/null @@ -1,78 +0,0 @@ -// Test that the vtable metadata that are emitted by Clang when speculative devirtualization -// is enabled can be used by the WholeProgramDevirt pass without being dropped on the way. -// RUN: %clang_cc1 -O3 -triple x86_64-unknown-linux -fdevirtualize-speculatively -mllvm -print-before=wholeprogramdevirt -S %s 2>&1 | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT %s - -struct A { - A(); - virtual void f(); -}; - -struct B : virtual A { - B(); - virtual void g(); - virtual void h(); -}; - -namespace { - -struct D : B { - D(); - virtual void f(); - virtual void h(); -}; - -} - -A::A() {} -B::B() {} -D::D() {} - -void A::f() { -} - -void B::g() { -} - -void D::f() { -} - -void D::h() { -} - -void af(A *a) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) - a->f(); -} - -void dg1(D *d) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1B") - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) - d->g(); -} -void df1(D *d) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) - d->f(); -} - -void dh1(D *d) { - // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = tail call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) - // VTABLE-OPT: call void @llvm.assume(i1 [[P]]) - d->h(); -} - - -D d; - -void foo() { - dg1(&d); - df1(&d); - dh1(&d); - - - struct FA : A { - void f() {} - } fa; - af(&fa); -} diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-metadata.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-metadata.ll new file mode 100644 index 0000000000000..d8781d5686b53 --- /dev/null +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-metadata.ll @@ -0,0 +1,64 @@ +; Test that the needed intrinsics for devirtualization are preserved and not dropped by other +; optimizations. + +; RUN: opt -S -O3 %s 2>&1 | FileCheck %s + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux-gnu" + +@vt1 = constant [1 x ptr] [ptr @vf], !type !8 +@vt2 = constant [1 x ptr] [ptr @vf2], !type !12 + +define i1 @vf(ptr %this) #0 !dbg !7 { + ret i1 true +} + +define i1 @vf2(ptr %this) !dbg !11 { + ret i1 false +} + +define void @call(ptr %obj) #1 !dbg !5 { + %vtable = load ptr, ptr %obj + ; CHECK: [[P:%[^ ]*]] = tail call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"typeid") + ; CHECK-NEXT: call void @llvm.assume(i1 [[P]]) + %p = call i1 @llvm.public.type.test(ptr %vtable, metadata !"typeid") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable + call i1 %fptr(ptr %obj), !dbg !6 + ret void +} + +define void @call1(ptr %obj) #1 !dbg !9 { + %vtable = load ptr, ptr %obj + ; CHECK: [[P:%[^ ]*]] = tail call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"typeid1") + ; CHECK-NEXT: call void @llvm.assume(i1 [[P]]) + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable, align 8 + %1 = call i1 %fptr(ptr %obj), !dbg !10 + ret void +} + +declare i1 @llvm.type.test(ptr, metadata) +declare i1 @llvm.public.type.test(ptr, metadata) +declare void @llvm.assume(i1) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.ident = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 4.0.0 (trunk 278098)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "devirt-single.cc", directory: ".") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{!"clang version 4.0.0 (trunk 278098)"} +!5 = distinct !DISubprogram(name: "call", linkageName: "_Z4callPv", scope: !1, file: !1, line: 29, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!6 = !DILocation(line: 30, column: 32, scope: !5) +!7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEb", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!8 = !{i32 0, !"typeid"} + +!9 = distinct !DISubprogram(name: "call1", linkageName: "_Z5call1Pv", scope: !1, file: !1, line: 31, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!10 = !DILocation(line: 35, column: 32, scope: !9) +!11 = distinct !DISubprogram(name: "vf2", linkageName: "_ZN3vt13vf2Eb", scope: !1, file: !1, line: 23, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!12 = !{i32 0, !"typeid1"} + >From f64ee2c312b9a730e1b3cde4b08ad32e3d1a374f Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Mon, 1 Dec 2025 23:38:03 +0000 Subject: [PATCH 6/9] add PhaseOrdering test --- llvm/lib/Passes/PassBuilderPipelines.cpp | 9 ++- .../speculative-devirt-then-inliner.ll | 60 +++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/PhaseOrdering/speculative-devirt-then-inliner.ll diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 9a55e7ae6cd39..66c7e73c95dd5 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -305,6 +305,13 @@ static cl::opt<std::string> InstrumentColdFuncOnlyPath( "with --pgo-instrument-cold-function-only)"), cl::Hidden); +// TODO: There is a similar flag in WPD pass, we should consolidate them by +// parsing the option only once in PassBuilder and share it across both places. +static cl::opt<bool> EnableDevirtualizeSpeculatively( + "enable-devirtualize-speculatively", + cl::desc("Enable speculative devirtualization optimization"), + cl::init(false)); + extern cl::opt<std::string> UseCtxProfile; extern cl::opt<bool> PGOInstrumentColdFunctionOnly; @@ -326,7 +333,7 @@ PipelineTuningOptions::PipelineTuningOptions() { MergeFunctions = EnableMergeFunctions; InlinerThreshold = -1; EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; - DevirtualizeSpeculatively = false; + DevirtualizeSpeculatively = EnableDevirtualizeSpeculatively; } namespace llvm { diff --git a/llvm/test/Transforms/PhaseOrdering/speculative-devirt-then-inliner.ll b/llvm/test/Transforms/PhaseOrdering/speculative-devirt-then-inliner.ll new file mode 100644 index 0000000000000..98df729696de9 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/speculative-devirt-then-inliner.ll @@ -0,0 +1,60 @@ +; RUN: opt -S -O3 -enable-devirtualize-speculatively %s 2>&1 | FileCheck %s + +; Test that the devirtualized calls are inlined. + +@vt1 = constant [1 x ptr] [ptr @vf], !type !0 +@vt2 = constant [1 x ptr] [ptr @vf2], !type !1 + + +define i1 @vf(ptr %this) { + ret i1 true +} + +define i1 @vf2(ptr %this) { + ret i1 false +} + +; CHECK: define i1 @call +define i1 @call(ptr %obj) #1 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.public.type.test(ptr %vtable, metadata !"typeid") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable + ; if.true.direct_targ: ; preds = %0 + ; br label %if.end.icp + ; if.false.orig_indirect: ; preds = %0 + ; %res = tail call i1 %fptr(ptr nonnull %obj) + ; br label %if.end.icp + ; if.end.icp: ; preds = %if.false.orig_indirect, %if.true.direct_targ + ; %2 = phi i1 [ %res, %if.false.orig_indirect ], [ true, %if.true.direct_targ ] + ; ret i1 %2 + %res = call i1 %fptr(ptr %obj) + ret i1 %res +} + + +; CHECK: define i1 @call1 +define i1 @call1(ptr %obj) #1 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable, align 8 + ; if.true.direct_targ: ; preds = %0 + ; br label %if.end.icp + ; if.false.orig_indirect: ; preds = %0 + ; %res = tail call i1 %fptr(ptr nonnull %obj) + ; br label %if.end.icp + ; if.end.icp: ; preds = %if.false.orig_indirect, %if.true.direct_targ + ; %2 = phi i1 [ %res, %if.false.orig_indirect ], [ false, %if.true.direct_targ ] + ; ret i1 %2 + %res = call i1 %fptr(ptr %obj) + ret i1 %res +} + + +declare i1 @llvm.type.test(ptr, metadata) +declare i1 @llvm.public.type.test(ptr, metadata) +declare void @llvm.assume(i1) + +!0 = !{i32 0, !"typeid"} +!1 = !{i32 0, !"typeid1"} >From eac8e32fcd1044ee9dbb3b87017ba04f091dda12 Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Tue, 2 Dec 2025 13:21:52 +0000 Subject: [PATCH 7/9] update code comments --- clang/lib/CodeGen/CGClass.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 5fd240fa1d115..1df16c2a68c17 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -2827,8 +2827,9 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, SourceLocation Loc) { if (SanOpts.has(SanitizerKind::CFIVCall)) EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); - // Emit the type test assumes for the features of WPD (only when LTO - // visibility is NOT public) and speculative devirtualization. + // Emit the intrinsics of (type_test and assume) for the features of WPD and + // speculative devirtualization. For WPD, emit the intrinsics only for the + // case of non_public LTO visibility. else if ((CGM.getCodeGenOpts().WholeProgramVTables && !CGM.AlwaysHasLTOVisibilityPublic(RD)) || CGM.getCodeGenOpts().DevirtualizeSpeculatively) { >From 2b1d72e7306f494ad03304ce7ba3a581233aa277 Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Wed, 3 Dec 2025 12:22:13 +0000 Subject: [PATCH 8/9] Add TODOs and update comments --- clang/docs/UsersManual.rst | 2 +- clang/lib/CodeGen/CGClass.cpp | 3 +++ llvm/lib/Passes/PassBuilderPipelines.cpp | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 07148afedad99..5f0a39b9f761f 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2368,7 +2368,7 @@ are listed below. when there's a single implementation of the virtual function in the module. There could be a single implementation of the virtual function either because the function is not overridden in any derived class, - or because there is a single instantiated object that is using the function. + or because all objects are instances of the same class/type. Ex of IR before the optimization: .. code-block:: llvm diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 1df16c2a68c17..96fde10f24f32 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -2830,6 +2830,9 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, // Emit the intrinsics of (type_test and assume) for the features of WPD and // speculative devirtualization. For WPD, emit the intrinsics only for the // case of non_public LTO visibility. + // TODO: refactor this condition and similar ones into a function (e.g., + // ShouldEmitDevirtualizationMD) to encapsulate the details of the different + // types of devirtualization. else if ((CGM.getCodeGenOpts().WholeProgramVTables && !CGM.AlwaysHasLTOVisibilityPublic(RD)) || CGM.getCodeGenOpts().DevirtualizeSpeculatively) { diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 66c7e73c95dd5..4de527d9ef85e 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1666,6 +1666,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // Add devirtualization pass only when LTO is not enabled, as otherwise // the pass is already enabled in the LTO pipeline. if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) { + // TODO: explore a better pipeline configuration that can improve + // compilation time overhead. MPM.addPass(WholeProgramDevirtPass( /*ExportSummary*/ nullptr, /*ImportSummary*/ nullptr, >From 76ac87d9dce3284a03517413daee05625f837b36 Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi <[email protected]> Date: Wed, 3 Dec 2025 23:51:49 +0000 Subject: [PATCH 9/9] Adjust identation in UsersManual.rst --- clang/docs/UsersManual.rst | 54 ++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 5f0a39b9f761f..7b1cf12d7648c 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2371,32 +2371,34 @@ are listed below. or because all objects are instances of the same class/type. Ex of IR before the optimization: - .. code-block:: llvm - %vtable = load ptr, ptr %BV, align 8, !tbaa !6 - %0 = tail call i1 @llvm.public.type.test(ptr %vtable, metadata !"_ZTS4Base") - tail call void @llvm.assume(i1 %0) - %0 = load ptr, ptr %vtable, align 8 - tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV) - ret void - - IR after the optimization: - .. code-block:: llvm - %vtable = load ptr, ptr %BV, align 8, !tbaa !12 - %0 = load ptr, ptr %vtable, align 8 - %1 = icmp eq ptr %0, @_ZN4Base17virtual_function1Ev - br i1 %1, label %if.true.direct_targ, label %if.false.orig_indirect, !prof !15 - if.true.direct_targ: ; preds = %entry - tail call void @_ZN4Base17virtual_function1Ev(ptr noundef nonnull align 8 dereferenceable(8) %BV) - br label %if.end.icp - if.false.orig_indirect: ; preds = %entry - tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV) - br label %if.end.icp - - if.end.icp: ; preds = %if.false.orig_indirect, %if.true.direct_targ - ret void - This feature is temporarily ignored at the LLVM side when LTO is enabled. - TODO: Update the comment when the LLVM side supports this feature for LTO. - This feature is turned off by default. + + .. code-block:: llvm + %vtable = load ptr, ptr %BV, align 8, !tbaa !6 + %0 = tail call i1 @llvm.public.type.test(ptr %vtable, metadata !"_ZTS4Base") + tail call void @llvm.assume(i1 %0) + %0 = load ptr, ptr %vtable, align 8 + tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV) + ret void + + IR after the optimization: + + .. code-block:: llvm + %vtable = load ptr, ptr %BV, align 8, !tbaa !12 + %0 = load ptr, ptr %vtable, align 8 + %1 = icmp eq ptr %0, @_ZN4Base17virtual_function1Ev + br i1 %1, label %if.true.direct_targ, label %if.false.orig_indirect, !prof !15 + if.true.direct_targ: ; preds = %entry + tail call void @_ZN4Base17virtual_function1Ev(ptr noundef nonnull align 8 dereferenceable(8) %BV) + br label %if.end.icp + if.false.orig_indirect: ; preds = %entry + tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV) + br label %if.end.icp + if.end.icp: ; preds = %if.false.orig_indirect, %if.true.direct_targ + ret void + + This feature is temporarily ignored at the LLVM side when LTO is enabled. + TODO: Update the comment when the LLVM side supports this feature for LTO. + This feature is turned off by default. .. option:: -f[no-]unique-source-file-names _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
