https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/75954
>From 4936920fbbe5e70a47be35b057200de3b07a087f Mon Sep 17 00:00:00 2001 From: mingmingl <mingmi...@google.com> Date: Mon, 18 Dec 2023 20:21:40 -0800 Subject: [PATCH 1/2] Reapply "Reland "[PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. "" (#75888) This reverts commit 6ce23ea0ab6370c944f5e426a20217f93f41aa15. --- compiler-rt/test/profile/CMakeLists.txt | 2 +- ...trprof-thinlto-indirect-call-promotion.cpp | 115 ++++++++++++++++++ llvm/include/llvm/IR/GlobalValue.h | 4 + llvm/include/llvm/ProfileData/InstrProf.h | 26 ++-- llvm/lib/IR/Globals.cpp | 12 +- llvm/lib/ProfileData/InstrProf.cpp | 36 ++++-- llvm/lib/ProfileData/InstrProfReader.cpp | 9 +- .../thinlto-function-summary-originalnames.ll | 10 +- llvm/test/ThinLTO/X86/memprof-basic.ll | 26 ++-- .../X86/memprof-duplicate-context-ids.ll | 10 +- .../ThinLTO/X86/memprof-funcassigncloning.ll | 6 +- llvm/test/ThinLTO/X86/memprof-indirectcall.ll | 32 ++--- llvm/test/ThinLTO/X86/memprof-inlined.ll | 14 +-- .../Inputs/thinlto_indirect_call_promotion.ll | 16 --- .../thinlto_indirect_call_promotion.profraw | Bin 0 -> 528 bytes ..._thinlto_indirect_call_promotion_inputs.sh | 62 ++++++++++ .../thinlto_indirect_call_promotion.ll | 105 +++++++++++----- llvm/unittests/ProfileData/InstrProfTest.cpp | 4 +- 18 files changed, 362 insertions(+), 127 deletions(-) create mode 100644 compiler-rt/test/profile/instrprof-thinlto-indirect-call-promotion.cpp delete mode 100644 llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll create mode 100644 llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.profraw create mode 100755 llvm/test/Transforms/PGOProfile/Inputs/update_thinlto_indirect_call_promotion_inputs.sh diff --git a/compiler-rt/test/profile/CMakeLists.txt b/compiler-rt/test/profile/CMakeLists.txt index 975e4c42f4b640..eebe0469efebe0 100644 --- a/compiler-rt/test/profile/CMakeLists.txt +++ b/compiler-rt/test/profile/CMakeLists.txt @@ -6,7 +6,7 @@ set(PROFILE_TESTSUITES) set(PROFILE_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS} compiler-rt-headers) list(APPEND PROFILE_TEST_DEPS profile) if(NOT COMPILER_RT_STANDALONE_BUILD) - list(APPEND PROFILE_TEST_DEPS llvm-profdata llvm-cov) + list(APPEND PROFILE_TEST_DEPS llvm-cov llvm-dis llvm-lto llvm-profdata opt) if(NOT APPLE AND COMPILER_RT_HAS_LLD AND "lld" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND PROFILE_TEST_DEPS lld) endif() diff --git a/compiler-rt/test/profile/instrprof-thinlto-indirect-call-promotion.cpp b/compiler-rt/test/profile/instrprof-thinlto-indirect-call-promotion.cpp new file mode 100644 index 00000000000000..82ca1cd7d0a564 --- /dev/null +++ b/compiler-rt/test/profile/instrprof-thinlto-indirect-call-promotion.cpp @@ -0,0 +1,115 @@ +// This is a regression test for ThinLTO indirect-call-promotion when candidate +// callees need to be imported from another IR module. In the C++ test case, +// `main` calls `global_func` which is defined in another module. `global_func` +// has two indirect callees, one has external linkage and one has local linkage. +// All three functions should be imported into the IR module of main. + +// What the test does: +// - Generate raw profiles from executables and convert it to indexed profiles. +// During the conversion, a profiled callee address in raw profiles will be +// converted to function hash in indexed profiles. +// - Run IRPGO profile use and ThinTLO prelink pipeline and get LLVM bitcodes +// for both cpp files in the C++ test case. +// - Generate ThinLTO summary file with LLVM bitcodes, and run `function-import` pass. +// - Run `pgo-icall-prom` pass for the IR module which needs to import callees. + +// Use lld as linker for more robust test. We need to REQUIRE LLVMgold.so for +// LTO if default linker is GNU ld or gold anyway. +// REQUIRES: lld-available + +// Test should fail where linkage-name and mangled-name diverges, see issue https://github.com/llvm/llvm-project/issues/74565). +// Currently, this name divergence happens on Mach-O object file format, or on +// many (but not all) 32-bit Windows systems. +// +// XFAIL: system-darwin +// +// Mark 32-bit Windows as UNSUPPORTED for now as opposed to XFAIL. This test +// should fail on many (but not all) 32-bit Windows systems and succeed on the +// rest. The flexibility in triple string parsing makes it tricky to capture +// both sets accurately. i[3-9]86 specifies arch as Triple::ArchType::x86, (win32|windows) +// specifies OS as Triple::OS::Win32 +// +// UNSUPPORTED: target={{i.86.*windows.*}} + +// RUN: rm -rf %t && split-file %s %t && cd %t + +// Do setup work for all below tests. +// Generate raw profiles from real programs and convert it into indexed profiles. +// Use clangxx_pgogen for IR level instrumentation for C++. +// RUN: %clangxx_pgogen -fuse-ld=lld -O2 lib.cpp main.cpp -o main +// RUN: env LLVM_PROFILE_FILE=main.profraw %run ./main +// RUN: llvm-profdata merge main.profraw -o main.profdata + +// Use profile on lib and get bitcode, test that local function callee0 has +// expected !PGOFuncName metadata and external function callee1 doesn't have +// !PGOFuncName metadata. Explicitly skip ICP pass to test ICP happens as +// expected in the IR module that imports functions from lib. +// RUN: %clang -mllvm -disable-icp -fprofile-use=main.profdata -flto=thin -O2 -c lib.cpp -o lib.bc +// RUN: llvm-dis lib.bc -o - | FileCheck %s --check-prefix=PGOName + +// Use profile on main and get bitcode. +// RUN: %clang -fprofile-use=main.profdata -flto=thin -O2 -c main.cpp -o main.bc + +// Run llvm-lto to get summary file. +// RUN: llvm-lto -thinlto -o summary main.bc lib.bc + +// Test the imports of functions. Default import thresholds would work but do +// explicit override to be more futureproof. Note all functions have one basic +// block with a function-entry-count of one, so they are actually hot functions +// per default profile summary hotness cutoff. +// RUN: opt -passes=function-import -import-instr-limit=100 -import-cold-multiplier=1 -summary-file summary.thinlto.bc main.bc -o main.import.bc -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS +// Test that '_Z11global_funcv' has indirect calls annotated with value profiles. +// RUN: llvm-dis main.import.bc -o - | FileCheck %s --check-prefix=IR + +// Test that both candidates are ICP'ed and there is no `!VP` in the IR. +// RUN: opt main.import.bc -icp-lto -passes=pgo-icall-prom -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefixes=ICP-IR,ICP-REMARK --implicit-check-not="!VP" + +// IMPORTS: main.cpp: Import _Z7callee1v +// IMPORTS: main.cpp: Import _ZL7callee0v.llvm.[[#]] +// IMPORTS: main.cpp: Import _Z11global_funcv + +// PGOName: define {{(dso_local )?}}void @_Z7callee1v() #[[#]] !prof ![[#]] { +// PGOName: define internal void @_ZL7callee0v() #[[#]] !prof ![[#]] !PGOFuncName ![[#MD:]] { +// PGOName: ![[#MD]] = !{!"{{.*}}lib.cpp;_ZL7callee0v"} + +// IR-LABEL: define available_externally {{.*}} void @_Z11global_funcv() {{.*}} !prof ![[#]] { +// IR-NEXT: entry: +// IR-NEXT: %0 = load ptr, ptr @calleeAddrs +// IR-NEXT: tail call void %0(), !prof ![[#PROF1:]] +// IR-NEXT: %1 = load ptr, ptr getelementptr inbounds ([2 x ptr], ptr @calleeAddrs, +// IR-NEXT: tail call void %1(), !prof ![[#PROF2:]] + +// The GUID of indirect callee is the MD5 hash of `/path/to/lib.cpp;_ZL7callee0v` +// that depends on the directory. Use [[#]] for its MD5 hash. +// Use {{.*}} for integer types so the test works on 32-bit and 64-bit systems. +// IR: ![[#PROF1]] = !{!"VP", i32 0, {{.*}} 1, {{.*}} [[#]], {{.*}} 1} +// IR: ![[#PROF2]] = !{!"VP", i32 0, {{.*}} 1, {{.*}} -3993653843325621743, {{.*}} 1} + +// ICP-REMARK: Promote indirect call to _ZL7callee0v.llvm.[[#]] with count 1 out of 1 +// ICP-REMARK: Promote indirect call to _Z7callee1v with count 1 out of 1 + +// ICP-IR: br i1 %[[#]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof ![[#BRANCH_WEIGHT1:]] +// ICP-IR: br i1 %[[#]], label %if.true.direct_targ1, label %if.false.orig_indirect2, !prof ![[#BRANCH_WEIGHT1]] +// ICP-IR: ![[#BRANCH_WEIGHT1]] = !{!"branch_weights", i32 1, i32 0} + +//--- lib.h +void global_func(); + +//--- lib.cpp +#include "lib.h" +static void callee0() {} +void callee1() {} +typedef void (*FPT)(); +FPT calleeAddrs[] = {callee0, callee1}; +// `global_func`` might call one of two indirect callees. callee0 has internal +// linkage and callee1 has external linkage. +void global_func() { + FPT fp = calleeAddrs[0]; + fp(); + fp = calleeAddrs[1]; + fp(); +} + +//--- main.cpp +#include "lib.h" +int main() { global_func(); } diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h index d1891c157099d4..e97a7f2b963606 100644 --- a/llvm/include/llvm/IR/GlobalValue.h +++ b/llvm/include/llvm/IR/GlobalValue.h @@ -41,6 +41,10 @@ namespace Intrinsic { typedef unsigned ID; } // end namespace Intrinsic +// Choose ';' as the delimiter. ':' was used once but it doesn't work well for +// Objective-C functions which commonly have :'s in their names. +inline constexpr char kGlobalIdentifierDelimiter = ';'; + class GlobalValue : public Constant { public: /// An enumeration for the kinds of linkage for global values. diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 288dc71d756aee..36be2e7d869e7b 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -171,6 +171,8 @@ inline StringRef getInstrProfCounterBiasVarName() { /// Return the marker used to separate PGO names during serialization. inline StringRef getInstrProfNameSeparator() { return "\01"; } +/// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is +/// for front-end (Clang, etc) instrumentation. /// Return the modified name for function \c F suitable to be /// used the key for profile lookup. Variable \c InLTO indicates if this /// is called in LTO optimization passes. @@ -196,20 +198,22 @@ std::string getIRPGOFuncName(const Function &F, bool InLTO = false); std::pair<StringRef, StringRef> getParsedIRPGOFuncName(StringRef IRPGOFuncName); /// Return the name of the global variable used to store a function -/// name in PGO instrumentation. \c FuncName is the name of the function -/// returned by the \c getPGOFuncName call. +/// name in PGO instrumentation. \c FuncName is the IRPGO function name +/// (returned by \c getIRPGOFuncName) for LLVM IR instrumentation and PGO +/// function name (returned by \c getPGOFuncName) for front-end instrumentation. std::string getPGOFuncNameVarName(StringRef FuncName, GlobalValue::LinkageTypes Linkage); /// Create and return the global variable for function name used in PGO -/// instrumentation. \c FuncName is the name of the function returned -/// by \c getPGOFuncName call. +/// instrumentation. \c FuncName is the IRPGO function name (returned by +/// \c getIRPGOFuncName) for LLVM IR instrumentation and PGO function name +/// (returned by \c getPGOFuncName) for front-end instrumentation. GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName); /// Create and return the global variable for function name used in PGO -/// instrumentation. /// \c FuncName is the name of the function -/// returned by \c getPGOFuncName call, \c M is the owning module, -/// and \c Linkage is the linkage of the instrumented function. +/// instrumentation. \c FuncName is the IRPGO function name (returned by +/// \c getIRPGOFuncName) for LLVM IR instrumentation and PGO function name +/// (returned by \c getPGOFuncName) for front-end instrumentation. GlobalVariable *createPGOFuncNameVar(Module &M, GlobalValue::LinkageTypes Linkage, StringRef PGOFuncName); @@ -417,11 +421,11 @@ uint64_t ComputeHash(StringRef K); } // end namespace IndexedInstrProf -/// A symbol table used for function PGO name look-up with keys +/// A symbol table used for function [IR]PGO name look-up with keys /// (such as pointers, md5hash values) to the function. A function's -/// PGO name or name's md5hash are used in retrieving the profile -/// data of the function. See \c getPGOFuncName() method for details -/// on how PGO name is formed. +/// [IR]PGO name or name's md5hash are used in retrieving the profile +/// data of the function. See \c getIRPGOFuncName() and \c getPGOFuncName +/// methods for details how [IR]PGO name is formed. class InstrProfSymtab { public: using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>; diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index 51bdbeb0abf2c4..239acd2181e854 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -144,25 +144,27 @@ void GlobalObject::copyAttributesFrom(const GlobalObject *Src) { std::string GlobalValue::getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName) { - // Value names may be prefixed with a binary '1' to indicate // that the backend should not modify the symbols due to any platform // naming convention. Do not include that '1' in the PGO profile name. if (Name[0] == '\1') Name = Name.substr(1); - std::string NewName = std::string(Name); + std::string GlobalName; if (llvm::GlobalValue::isLocalLinkage(Linkage)) { // For local symbols, prepend the main file name to distinguish them. // Do not include the full path in the file name since there's no guarantee // that it will stay the same, e.g., if the files are checked out from // version control in different locations. if (FileName.empty()) - NewName = NewName.insert(0, "<unknown>:"); + GlobalName += "<unknown>"; else - NewName = NewName.insert(0, FileName.str() + ":"); + GlobalName += FileName; + + GlobalName += kGlobalIdentifierDelimiter; } - return NewName; + GlobalName += Name; + return GlobalName; } std::string GlobalValue::getGlobalIdentifier() const { diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 649d814cfd9de0..134a400e639c4b 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -246,11 +246,27 @@ std::string InstrProfError::message() const { char InstrProfError::ID = 0; -std::string getPGOFuncName(StringRef RawFuncName, - GlobalValue::LinkageTypes Linkage, +std::string getPGOFuncName(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName, uint64_t Version LLVM_ATTRIBUTE_UNUSED) { - return GlobalValue::getGlobalIdentifier(RawFuncName, Linkage, FileName); + // Value names may be prefixed with a binary '1' to indicate + // that the backend should not modify the symbols due to any platform + // naming convention. Do not include that '1' in the PGO profile name. + if (Name[0] == '\1') + Name = Name.substr(1); + + std::string NewName = std::string(Name); + if (llvm::GlobalValue::isLocalLinkage(Linkage)) { + // For local symbols, prepend the main file name to distinguish them. + // Do not include the full path in the file name since there's no guarantee + // that it will stay the same, e.g., if the files are checked out from + // version control in different locations. + if (FileName.empty()) + NewName = NewName.insert(0, "<unknown>:"); + else + NewName = NewName.insert(0, FileName.str() + ":"); + } + return NewName; } // Strip NumPrefix level of directory name from PathNameStr. If the number of @@ -300,12 +316,10 @@ getIRPGONameForGlobalObject(const GlobalObject &GO, GlobalValue::LinkageTypes Linkage, StringRef FileName) { SmallString<64> Name; - if (llvm::GlobalValue::isLocalLinkage(Linkage)) { - Name.append(FileName.empty() ? "<unknown>" : FileName); - Name.append(";"); - } + // FIXME: Mangler's handling is kept outside of `getGlobalIdentifier` for now. + // For more details please check issue #74565. Mangler().getNameWithPrefix(Name, &GO, /*CannotUsePrivateLabel=*/true); - return Name.str().str(); + return GlobalValue::getGlobalIdentifier(Name, Linkage, FileName); } static std::optional<std::string> lookupPGONameFromMetadata(MDNode *MD) { @@ -352,6 +366,9 @@ std::string getIRPGOFuncName(const Function &F, bool InLTO) { return getIRPGOObjectName(F, InLTO, getPGOFuncNameMetadata(F)); } +// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is +// for front-end (Clang, etc) instrumentation. +// The implementation is kept for profile matching from older profiles. // This is similar to `getIRPGOFuncName` except that this function calls // 'getPGOFuncName' to get a name and `getIRPGOFuncName` calls // 'getIRPGONameForGlobalObject'. See the difference between two callees in the @@ -384,7 +401,8 @@ getParsedIRPGOFuncName(StringRef IRPGOFuncName) { StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) { if (FileName.empty()) return PGOFuncName; - // Drop the file name including ':'. See also getPGOFuncName. + // Drop the file name including ':' or ';'. See getIRPGONameForGlobalObject as + // well. if (PGOFuncName.starts_with(FileName)) PGOFuncName = PGOFuncName.drop_front(FileName.size() + 1); return PGOFuncName; diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 068922d421f8b9..8f62df79d5b7e8 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1008,12 +1008,13 @@ class llvm::InstrProfReaderItaniumRemapper /// Extract the original function name from a PGO function name. static StringRef extractName(StringRef Name) { - // We can have multiple :-separated pieces; there can be pieces both - // before and after the mangled name. Find the first part that starts - // with '_Z'; we'll assume that's the mangled name we want. + // We can have multiple pieces separated by kGlobalIdentifierDelimiter ( + // semicolon now and colon in older profiles); there can be pieces both + // before and after the mangled name. Find the first part that starts with + // '_Z'; we'll assume that's the mangled name we want. std::pair<StringRef, StringRef> Parts = {StringRef(), Name}; while (true) { - Parts = Parts.second.split(':'); + Parts = Parts.second.split(kGlobalIdentifierDelimiter); if (Parts.first.starts_with("_Z")) return Parts.first; if (Parts.second.empty()) diff --git a/llvm/test/Bitcode/thinlto-function-summary-originalnames.ll b/llvm/test/Bitcode/thinlto-function-summary-originalnames.ll index 7cc9654c8c7b12..0139f00b4aa3f4 100644 --- a/llvm/test/Bitcode/thinlto-function-summary-originalnames.ll +++ b/llvm/test/Bitcode/thinlto-function-summary-originalnames.ll @@ -6,13 +6,13 @@ ; COMBINED: <GLOBALVAL_SUMMARY_BLOCK ; COMBINED-NEXT: <VERSION ; COMBINED-NEXT: <FLAGS -; COMBINED-NEXT: <VALUE_GUID {{.*}} op1=4947176790635855146/> -; COMBINED-NEXT: <VALUE_GUID {{.*}} op1=-6591587165810580810/> -; COMBINED-NEXT: <VALUE_GUID {{.*}} op1=-4377693495213223786/> +; COMBINED-NEXT: <VALUE_GUID {{.*}} op1=686735765308251824/> +; COMBINED-NEXT: <VALUE_GUID {{.*}} op1=4507502870619175775/> +; COMBINED-NEXT: <VALUE_GUID {{.*}} op1=-8118561185538785069/> ; COMBINED-DAG: <COMBINED_PROFILE{{ }} -; COMBINED-DAG: <COMBINED_ORIGINAL_NAME op0=6699318081062747564/> -; COMBINED-DAG: <COMBINED_GLOBALVAR_INIT_REFS ; COMBINED-DAG: <COMBINED_ORIGINAL_NAME op0=-2012135647395072713/> +; COMBINED-DAG: <COMBINED_GLOBALVAR_INIT_REFS +; COMBINED-DAG: <COMBINED_ORIGINAL_NAME op0=6699318081062747564/> ; COMBINED-DAG: <COMBINED_ALIAS ; COMBINED-DAG: <COMBINED_ORIGINAL_NAME op0=-4170563161550796836/> ; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK> diff --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll index 0d466830ba57d6..54e01e5fcdf955 100644 --- a/llvm/test/ThinLTO/X86/memprof-basic.ll +++ b/llvm/test/ThinLTO/X86/memprof-basic.ll @@ -148,7 +148,7 @@ attributes #0 = { noinline optnone } ; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 ; DUMP: Node [[BAZ]] -; DUMP: Callee: 9832687305761716512 (_Z3barv) Clones: 0 StackIds: 2 (clone 0) +; DUMP: Callee: 11481133863268513686 (_Z3barv) Clones: 0 StackIds: 2 (clone 0) ; DUMP: AllocTypes: NotColdCold ; DUMP: ContextIds: 1 2 ; DUMP: CalleeEdges: @@ -157,7 +157,7 @@ attributes #0 = { noinline optnone } ; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 ; DUMP: Node [[FOO]] -; DUMP: Callee: 5878270615442837395 (_Z3bazv) Clones: 0 StackIds: 3 (clone 0) +; DUMP: Callee: 1807954217441101578 (_Z3bazv) Clones: 0 StackIds: 3 (clone 0) ; DUMP: AllocTypes: NotColdCold ; DUMP: ContextIds: 1 2 ; DUMP: CalleeEdges: @@ -167,7 +167,7 @@ attributes #0 = { noinline optnone } ; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 ; DUMP: Node [[MAIN1]] -; DUMP: Callee: 6731117468105397038 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) +; DUMP: Callee: 8107868197919466657 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) ; DUMP: AllocTypes: NotCold ; DUMP: ContextIds: 1 ; DUMP: CalleeEdges: @@ -175,7 +175,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[MAIN2]] -; DUMP: Callee: 6731117468105397038 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) +; DUMP: Callee: 8107868197919466657 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) ; DUMP: AllocTypes: Cold ; DUMP: ContextIds: 2 ; DUMP: CalleeEdges: @@ -197,7 +197,7 @@ attributes #0 = { noinline optnone } ; DUMP: Clones: [[BAR2:0x[a-z0-9]+]] ; DUMP: Node [[BAZ]] -; DUMP: Callee: 9832687305761716512 (_Z3barv) Clones: 0 StackIds: 2 (clone 0) +; DUMP: Callee: 11481133863268513686 (_Z3barv) Clones: 0 StackIds: 2 (clone 0) ; DUMP: AllocTypes: NotCold ; DUMP: ContextIds: 1 ; DUMP: CalleeEdges: @@ -207,7 +207,7 @@ attributes #0 = { noinline optnone } ; DUMP: Clones: [[BAZ2:0x[a-z0-9]+]] ; DUMP: Node [[FOO]] -; DUMP: Callee: 5878270615442837395 (_Z3bazv) Clones: 0 StackIds: 3 (clone 0) +; DUMP: Callee: 1807954217441101578 (_Z3bazv) Clones: 0 StackIds: 3 (clone 0) ; DUMP: AllocTypes: NotCold ; DUMP: ContextIds: 1 ; DUMP: CalleeEdges: @@ -217,7 +217,7 @@ attributes #0 = { noinline optnone } ; DUMP: Clones: [[FOO2:0x[a-z0-9]+]] ; DUMP: Node [[MAIN1]] -; DUMP: Callee: 6731117468105397038 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) +; DUMP: Callee: 8107868197919466657 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) ; DUMP: AllocTypes: NotCold ; DUMP: ContextIds: 1 ; DUMP: CalleeEdges: @@ -225,7 +225,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[MAIN2]] -; DUMP: Callee: 6731117468105397038 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) +; DUMP: Callee: 8107868197919466657 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) ; DUMP: AllocTypes: Cold ; DUMP: ContextIds: 2 ; DUMP: CalleeEdges: @@ -233,7 +233,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[FOO2]] -; DUMP: Callee: 5878270615442837395 (_Z3bazv) Clones: 0 StackIds: 3 (clone 0) +; DUMP: Callee: 1807954217441101578 (_Z3bazv) Clones: 0 StackIds: 3 (clone 0) ; DUMP: AllocTypes: Cold ; DUMP: ContextIds: 2 ; DUMP: CalleeEdges: @@ -243,7 +243,7 @@ attributes #0 = { noinline optnone } ; DUMP: Clone of [[FOO]] ; DUMP: Node [[BAZ2]] -; DUMP: Callee: 9832687305761716512 (_Z3barv) Clones: 0 StackIds: 2 (clone 0) +; DUMP: Callee: 11481133863268513686 (_Z3barv) Clones: 0 StackIds: 2 (clone 0) ; DUMP: AllocTypes: Cold ; DUMP: ContextIds: 2 ; DUMP: CalleeEdges: @@ -344,7 +344,7 @@ attributes #0 = { noinline optnone } ; DOTCLONED: } -; DISTRIB: ^[[BAZ:[0-9]+]] = gv: (guid: 5878270615442837395, {{.*}} callsites: ((callee: ^[[BAR:[0-9]+]], clones: (0, 1) -; DISTRIB: ^[[FOO:[0-9]+]] = gv: (guid: 6731117468105397038, {{.*}} callsites: ((callee: ^[[BAZ]], clones: (0, 1) -; DISTRIB: ^[[BAR]] = gv: (guid: 9832687305761716512, {{.*}} allocs: ((versions: (notcold, cold) +; DISTRIB: ^[[BAZ:[0-9]+]] = gv: (guid: 1807954217441101578, {{.*}} callsites: ((callee: ^[[BAR:[0-9]+]], clones: (0, 1) +; DISTRIB: ^[[FOO:[0-9]+]] = gv: (guid: 8107868197919466657, {{.*}} callsites: ((callee: ^[[BAZ]], clones: (0, 1) +; DISTRIB: ^[[BAR]] = gv: (guid: 11481133863268513686, {{.*}} allocs: ((versions: (notcold, cold) ; DISTRIB: ^[[MAIN:[0-9]+]] = gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^[[FOO]], clones: (0), {{.*}} (callee: ^[[FOO]], clones: (1) diff --git a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll index f7ba0d27dca78a..65d794e9cba87c 100644 --- a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll +++ b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll @@ -260,8 +260,8 @@ attributes #0 = { noinline optnone} ; STATS-BE: 1 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend -; DISTRIB: ^[[C:[0-9]+]] = gv: (guid: 1643923691937891493, {{.*}} callsites: ((callee: ^[[D:[0-9]+]], clones: (1) -; DISTRIB: ^[[D]] = gv: (guid: 4881081444663423788, {{.*}} allocs: ((versions: (notcold, cold) -; DISTRIB: ^[[B:[0-9]+]] = gv: (guid: 14590037969532473829, {{.*}} callsites: ((callee: ^[[D]], clones: (1) -; DISTRIB: ^[[F:[0-9]+]] = gv: (guid: 17035303613541779335, {{.*}} callsites: ((callee: ^[[D]], clones: (0) -; DISTRIB: ^[[E:[0-9]+]] = gv: (guid: 17820708772846654376, {{.*}} callsites: ((callee: ^[[D]], clones: (1) +; DISTRIB: ^[[E:[0-9]+]] = gv: (guid: 331966645857188136, {{.*}} callsites: ((callee: ^[[D:[0-9]+]], clones: (1) +; DISTRIB: ^[[D]] = gv: (guid: 11079124245221721799, {{.*}} allocs: ((versions: (notcold, cold) +; DISTRIB: ^[[F:[0-9]+]] = gv: (guid: 11254287701717398916, {{.*}} callsites: ((callee: ^[[D]], clones: (0) +; DISTRIB: ^[[B:[0-9]+]] = gv: (guid: 13579056193435805313, {{.*}} callsites: ((callee: ^[[D]], clones: (1) +; DISTRIB: ^[[C:[0-9]+]] = gv: (guid: 15101436305866936160, {{.*}} callsites: ((callee: ^[[D:[0-9]+]], clones: (1) diff --git a/llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll b/llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll index 9a72ae43b2f1e4..f1a494d077fefc 100644 --- a/llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll +++ b/llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll @@ -176,7 +176,7 @@ attributes #0 = { noinline optnone } ; DUMP: Clones: [[ENEW1CLONE:0x[a-z0-9]+]] ; DUMP: Node [[D:0x[a-z0-9]+]] -; DUMP: Callee: 10758063066234039248 (_Z1EPPcS0_) Clones: 0 StackIds: 0 (clone 0) +; DUMP: Callee: 16147627620923572899 (_Z1EPPcS0_) Clones: 0 StackIds: 0 (clone 0) ; DUMP: AllocTypes: NotColdCold ; DUMP: ContextIds: 1 6 ; DUMP: CalleeEdges: @@ -185,7 +185,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[C]] -; DUMP: Callee: 10758063066234039248 (_Z1EPPcS0_) Clones: 0 StackIds: 1 (clone 0) +; DUMP: Callee: 16147627620923572899 (_Z1EPPcS0_) Clones: 0 StackIds: 1 (clone 0) ; DUMP: AllocTypes: NotColdCold ; DUMP: ContextIds: 2 5 ; DUMP: CalleeEdges: @@ -194,7 +194,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[B]] -; DUMP: Callee: 10758063066234039248 (_Z1EPPcS0_) Clones: 0 StackIds: 2 (clone 0) +; DUMP: Callee: 16147627620923572899 (_Z1EPPcS0_) Clones: 0 StackIds: 2 (clone 0) ; DUMP: AllocTypes: NotCold ; DUMP: ContextIds: 3 4 ; DUMP: CalleeEdges: diff --git a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll index 76273959f4f4ac..07a52f441ca278 100644 --- a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll +++ b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll @@ -202,7 +202,7 @@ attributes #0 = { noinline optnone } ; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 6 ; DUMP: Node [[AX]] -; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 6 (clone 0) +; DUMP: Callee: 15844184524768596045 (_Z3foov) Clones: 0 StackIds: 6 (clone 0) ; DUMP: AllocTypes: NotColdCold ; DUMP: ContextIds: 1 2 ; DUMP: CalleeEdges: @@ -225,7 +225,7 @@ attributes #0 = { noinline optnone } ; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 5 ; DUMP: Node [[MAIN3]] -; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 4 (clone 0) +; DUMP: Callee: 2040285415115148168 (_Z3barP1A) Clones: 0 StackIds: 4 (clone 0) ; DUMP: AllocTypes: NotCold ; DUMP: ContextIds: 1 ; DUMP: CalleeEdges: @@ -233,7 +233,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[MAIN4]] -; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 5 (clone 0) +; DUMP: Callee: 2040285415115148168 (_Z3barP1A) Clones: 0 StackIds: 5 (clone 0) ; DUMP: AllocTypes: Cold ; DUMP: ContextIds: 2 ; DUMP: CalleeEdges: @@ -241,7 +241,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[MAIN1]] -; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) +; DUMP: Callee: 15844184524768596045 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) ; DUMP: AllocTypes: NotCold ; DUMP: ContextIds: 3 ; DUMP: CalleeEdges: @@ -249,7 +249,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[BX]] -; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 7 (clone 0) +; DUMP: Callee: 15844184524768596045 (_Z3foov) Clones: 0 StackIds: 7 (clone 0) ; DUMP: AllocTypes: NotColdCold ; DUMP: ContextIds: 4 5 ; DUMP: CalleeEdges: @@ -258,7 +258,7 @@ attributes #0 = { noinline optnone } ; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 ; DUMP: Node [[MAIN5]] -; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 2 (clone 0) +; DUMP: Callee: 2040285415115148168 (_Z3barP1A) Clones: 0 StackIds: 2 (clone 0) ; DUMP: AllocTypes: Cold ; DUMP: ContextIds: 4 ; DUMP: CalleeEdges: @@ -266,7 +266,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[MAIN6]] -; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 3 (clone 0) +; DUMP: Callee: 2040285415115148168 (_Z3barP1A) Clones: 0 StackIds: 3 (clone 0) ; DUMP: AllocTypes: NotCold ; DUMP: ContextIds: 5 ; DUMP: CalleeEdges: @@ -274,7 +274,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[MAIN2]] -; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) +; DUMP: Callee: 15844184524768596045 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) ; DUMP: AllocTypes: Cold ; DUMP: ContextIds: 6 ; DUMP: CalleeEdges: @@ -302,7 +302,7 @@ attributes #0 = { noinline optnone } ; DUMP: Clones: [[FOO2:0x[a-z0-9]+]] ; DUMP: Node [[AX]] -; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 6 (clone 0) +; DUMP: Callee: 15844184524768596045 (_Z3foov) Clones: 0 StackIds: 6 (clone 0) ; DUMP: AllocTypes: NotColdCold ; DUMP: ContextIds: 1 2 ; DUMP: CalleeEdges: @@ -324,7 +324,7 @@ attributes #0 = { noinline optnone } ; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5 ; DUMP: Node [[MAIN3]] -; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 4 (clone 0) +; DUMP: Callee: 2040285415115148168 (_Z3barP1A) Clones: 0 StackIds: 4 (clone 0) ; DUMP: AllocTypes: NotCold ; DUMP: ContextIds: 1 ; DUMP: CalleeEdges: @@ -332,7 +332,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[MAIN4]] -; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 5 (clone 0) +; DUMP: Callee: 2040285415115148168 (_Z3barP1A) Clones: 0 StackIds: 5 (clone 0) ; DUMP: AllocTypes: Cold ; DUMP: ContextIds: 2 ; DUMP: CalleeEdges: @@ -340,7 +340,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[MAIN1]] -; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) +; DUMP: Callee: 15844184524768596045 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) ; DUMP: AllocTypes: NotCold ; DUMP: ContextIds: 3 ; DUMP: CalleeEdges: @@ -348,7 +348,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[BX]] -; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 7 (clone 0) +; DUMP: Callee: 15844184524768596045 (_Z3foov) Clones: 0 StackIds: 7 (clone 0) ; DUMP: AllocTypes: NotColdCold ; DUMP: ContextIds: 4 5 ; DUMP: CalleeEdges: @@ -357,7 +357,7 @@ attributes #0 = { noinline optnone } ; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 ; DUMP: Node [[MAIN5]] -; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 2 (clone 0) +; DUMP: Callee: 2040285415115148168 (_Z3barP1A) Clones: 0 StackIds: 2 (clone 0) ; DUMP: AllocTypes: Cold ; DUMP: ContextIds: 4 ; DUMP: CalleeEdges: @@ -365,7 +365,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[MAIN6]] -; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 3 (clone 0) +; DUMP: Callee: 2040285415115148168 (_Z3barP1A) Clones: 0 StackIds: 3 (clone 0) ; DUMP: AllocTypes: NotCold ; DUMP: ContextIds: 5 ; DUMP: CalleeEdges: @@ -373,7 +373,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[MAIN2]] -; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) +; DUMP: Callee: 15844184524768596045 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) ; DUMP: AllocTypes: Cold ; DUMP: ContextIds: 6 ; DUMP: CalleeEdges: diff --git a/llvm/test/ThinLTO/X86/memprof-inlined.ll b/llvm/test/ThinLTO/X86/memprof-inlined.ll index feb9c94344223c..89df345b220423 100644 --- a/llvm/test/ThinLTO/X86/memprof-inlined.ll +++ b/llvm/test/ThinLTO/X86/memprof-inlined.ll @@ -170,7 +170,7 @@ attributes #0 = { noinline optnone } ; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 ; DUMP: Node [[MAIN1]] -; DUMP: Callee: 2229562716906371625 (_Z3foov) Clones: 0 StackIds: 2 (clone 0) +; DUMP: Callee: 644169328058379925 (_Z3foov) Clones: 0 StackIds: 2 (clone 0) ; DUMP: AllocTypes: NotCold ; DUMP: ContextIds: 1 3 ; DUMP: CalleeEdges: @@ -179,7 +179,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[MAIN2]] -; DUMP: Callee: 2229562716906371625 (_Z3foov) Clones: 0 StackIds: 3 (clone 0) +; DUMP: Callee: 644169328058379925 (_Z3foov) Clones: 0 StackIds: 3 (clone 0) ; DUMP: AllocTypes: Cold ; DUMP: ContextIds: 2 4 ; DUMP: CalleeEdges: @@ -201,7 +201,7 @@ attributes #0 = { noinline optnone } ;; This is the node synthesized for the call to bar in foo that was created ;; by inlining baz into foo. ; DUMP: Node [[FOO]] -; DUMP: Callee: 16064618363798697104 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0) +; DUMP: Callee: 10349908617508457487 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0) ; DUMP: AllocTypes: NotColdCold ; DUMP: ContextIds: 3 4 ; DUMP: CalleeEdges: @@ -234,7 +234,7 @@ attributes #0 = { noinline optnone } ; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 ; DUMP: Node [[MAIN1]] -; DUMP: Callee: 2229562716906371625 (_Z3foov) Clones: 0 StackIds: 2 (clone 0) +; DUMP: Callee: 644169328058379925 (_Z3foov) Clones: 0 StackIds: 2 (clone 0) ; DUMP: AllocTypes: NotCold ; DUMP: ContextIds: 1 3 ; DUMP: CalleeEdges: @@ -243,7 +243,7 @@ attributes #0 = { noinline optnone } ; DUMP: CallerEdges: ; DUMP: Node [[MAIN2]] -; DUMP: Callee: 2229562716906371625 (_Z3foov) Clones: 0 StackIds: 3 (clone 0) +; DUMP: Callee: 644169328058379925 (_Z3foov) Clones: 0 StackIds: 3 (clone 0) ; DUMP: AllocTypes: Cold ; DUMP: ContextIds: 2 4 ; DUMP: CalleeEdges: @@ -264,7 +264,7 @@ attributes #0 = { noinline optnone } ; DUMP: Clones: [[BAR2:0x[a-z0-9]+]] ; DUMP: Node [[FOO]] -; DUMP: Callee: 16064618363798697104 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0) +; DUMP: Callee: 10349908617508457487 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0) ; DUMP: AllocTypes: NotCold ; DUMP: ContextIds: 3 ; DUMP: CalleeEdges: @@ -274,7 +274,7 @@ attributes #0 = { noinline optnone } ; DUMP: Clones: [[FOO3]] ; DUMP: Node [[FOO3]] -; DUMP: Callee: 16064618363798697104 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0) +; DUMP: Callee: 10349908617508457487 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0) ; DUMP: AllocTypes: Cold ; DUMP: ContextIds: 4 ; DUMP: CalleeEdges: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll b/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll deleted file mode 100644 index 7412120bb52cf5..00000000000000 --- a/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll +++ /dev/null @@ -1,16 +0,0 @@ -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -source_filename = "thinlto_indirect_call_promotion.c" - -define void @a() { -entry: - ret void -} - -define internal void @c() !PGOFuncName !1 { -entry: - ret void -} - -!1 = !{!"thinlto_indirect_call_promotion.c:c"} diff --git a/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.profraw b/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.profraw new file mode 100644 index 0000000000000000000000000000000000000000..5efda10bb98a941c04b6846db05d3691bc36aac0 GIT binary patch literal 528 zcmZoHO3N=Q$obF700xW@ih+Rz#(>i3d^BkWXQ;q~{}ABueD@>eRsa8Q&U2Q%6Ux8< zGg$D|W`(~SCZ7PRdViVAmkXvIW}d)Ih<X?wrVqxCo@t!pb5*AXrhf(0KA5}$0sU81 zHLMa3<=qEq6WVr<y#P(W0aX7ENf)@gnP7$k>0eiSpPQvjy$#d`cE1Ih{ssc}!~BDb zHZZQZ)!lF8?|0^;r?06_@|lx9m%RP9_4R*h?2?>fc+6c=YT5HA>Z;F<ET8&Jbvh$M re~rLy7M_Y*r+v?yVP;^FW?Tk#tp$*T*#UAtBa{ORADA30ykUF*Dr$#p literal 0 HcmV?d00001 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/update_thinlto_indirect_call_promotion_inputs.sh b/llvm/test/Transforms/PGOProfile/Inputs/update_thinlto_indirect_call_promotion_inputs.sh new file mode 100755 index 00000000000000..9676b042a641ca --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/Inputs/update_thinlto_indirect_call_promotion_inputs.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +if [ $# -lt 1 ]; then + echo "Path to clang required!" + echo "Usage: update_thinlto_indirect_call_promotion_inputs.sh /path/to/updated/clang" + exit 1 +else + CLANG=$1 +fi + +# Remember current directory. +CURDIR=$PWD + +# Allows the script to be invoked from other directories. +OUTDIR=$(dirname $(realpath -s $0)) +cd $OUTDIR + +# Creates trivial header file to expose `global_func`. +cat > lib.h << EOF +void global_func(); +EOF + +# Creates lib.cc. `global_func` might call one of two indirect callees. One +# callee has internal linkage and the other has external linkage. +cat > lib.cc << EOF +#include "lib.h" +static void callee0() {} +void callee1() {} +typedef void (*FPT)(); +FPT calleeAddrs[] = {callee0, callee1}; +void global_func() { + FPT fp = nullptr; + fp = calleeAddrs[0]; + fp(); + fp = calleeAddrs[1]; + fp(); +} +EOF + +# Create main.cc. Function `main` calls `global_func`. +cat > main.cc << EOF +#include "lib.h" +int main() { + global_func(); +} +EOF + +# Clean up temporary files on exit and return to original directory. +cleanup() { + rm -f $OUTDIR/lib.h + rm -f $OUTDIR/lib.cc + rm -f $OUTDIR/main.cc + rm -f $OUTDIR/lib.h.pch + rm -f $OUTDIR/a.out + cd $CURDIR +} +trap cleanup EXIT + +# Generate instrumented binary +${CLANG} -fuse-ld=lld -O2 -fprofile-generate=. lib.h lib.cc main.cc +# Create raw profiles +env LLVM_PROFILE_FILE=thinlto_indirect_call_promotion.profraw ./a.out diff --git a/llvm/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll b/llvm/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll index 173296f223e56a..63e2a7a904a1bd 100644 --- a/llvm/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll +++ b/llvm/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll @@ -1,39 +1,84 @@ -; Do setup work for all below tests: generate bitcode and combined index -; RUN: opt -module-summary %s -o %t.bc -; RUN: opt -module-summary %p/Inputs/thinlto_indirect_call_promotion.ll -o %t2.bc -; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc +; The raw profiles (and reduced IR if needed) could be re-generated (e.g., when +; there is a profile version bump) from script +; Inputs/update_thinlto_indirect_call_promotion_inputs.sh +; +; The script generates raw profiles. This regression test will convert it to +; indexed profiles. This way the test exercises code path where a profiled +; callee address in raw profiles is converted to function hash in index profiles. -; RUN: opt -passes=function-import -summary-file %t3.thinlto.bc %t.bc -o %t4.bc -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS -; IMPORTS-DAG: Import a -; IMPORTS-DAG: Import c +; The raw profiles storesd compressed function names, so profile reader should +; be built with zlib support to decompress them. +; REQUIRES: zlib -; RUN: opt %t4.bc -icp-lto -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM -; RUN: opt %t4.bc -icp-lto -passes=pgo-icall-prom -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefix=PASS-REMARK -; PASS-REMARK: Promote indirect call to a with count 1 out of 1 -; PASS-REMARK: Promote indirect call to c.llvm.0 with count 1 out of 1 +; RUN: rm -rf %t && split-file %s %t && cd %t -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" +; Do setup work for all below tests: convert raw profiles to indexed profiles, +; run profile-use pass, generate bitcode and combined ThinLTO index. +; Note `pgo-instr-use` pass runs without `pgo-icall-prom` pass. As a result ICP +; transformation won't happen at test setup time. +; RUN: llvm-profdata merge %p/Inputs/thinlto_indirect_call_promotion.profraw -o icp.profdata +; RUN: opt -passes=pgo-instr-use -pgo-test-profile-file=icp.profdata -module-summary main.ll -o main.bc +; RUN: opt -passes=pgo-instr-use -pgo-test-profile-file=icp.profdata -module-summary lib.ll -o lib.bc +; RUN: llvm-lto -thinlto -o summary main.bc lib.bc -@foo = external local_unnamed_addr global ptr, align 8 -@bar = external local_unnamed_addr global ptr, align 8 +; Test that callee with local linkage has `PGOFuncName` metadata while callee with external doesn't have it. +; RUN: llvm-dis lib.bc -o - | FileCheck %s --check-prefix=PGOName +; PGOName: define void @_Z7callee1v() {{.*}} !prof ![[#]] { +; PGOName: define internal void @_ZL7callee0v() {{.*}} !prof ![[#]] !PGOFuncName ![[#MD:]] { +; The source filename of `lib.ll` is specified as "lib.cc" (i.e., the name does +; not change with the directory), so match the full name here. +; PGOName: ![[#MD]] = !{!"lib.cc;_ZL7callee0v"} -define i32 @main() local_unnamed_addr { -entry: - %0 = load ptr, ptr @foo, align 8 -; ICALL-PROM: br i1 %{{[0-9]+}}, label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]] - tail call void %0(), !prof !1 - %1 = load ptr, ptr @bar, align 8 -; ICALL-PROM: br i1 %{{[0-9]+}}, label %if.true.direct_targ1, label %if.false.orig_indirect2, !prof [[BRANCH_WEIGHT:![0-9]+]] - tail call void %1(), !prof !2 +; Tests that both external and internal callees are correctly imported. +; RUN: opt -passes=function-import -summary-file summary.thinlto.bc main.bc -o main.import.bc -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS +; IMPORTS: Import _Z7callee1v +; IMPORTS: Import _ZL7callee0v.llvm.[[#]] +; IMPORTS: Import _Z11global_funcv + +; Tests that ICP transformations happen. +; Both candidates are ICP'ed, check there is no `!VP` in the IR. +; RUN: opt main.import.bc -icp-lto -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM --implicit-check-not="!VP" +; RUN: opt main.import.bc -icp-lto -passes=pgo-icall-prom -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefix=PASS-REMARK + +; PASS-REMARK: Promote indirect call to _ZL7callee0v.llvm.[[#]] with count 1 out of 1 +; PASS-REMARK: Promote indirect call to _Z7callee1v with count 1 out of 1 + +; ICALL-PROM: br i1 %[[#]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof ![[#BRANCH_WEIGHT1:]] +; ICALL-PROM: br i1 %[[#]], label %if.true.direct_targ1, label %if.false.orig_indirect2, !prof ![[#BRANCH_WEIGHT1]] + +; ICALL-PROM: ![[#BRANCH_WEIGHT1]] = !{!"branch_weights", i32 1, i32 0} + +;--- main.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() { + call void @_Z11global_funcv() ret i32 0 } -!1 = !{!"VP", i32 0, i64 1, i64 -6289574019528802036, i64 1} -!2 = !{!"VP", i32 0, i64 1, i64 591260329866125152, i64 1} +declare void @_Z11global_funcv() -; Should not have a VP annotation on new indirect call (check before and after -; branch_weights annotation). -; ICALL-PROM-NOT: !"VP" -; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 0} -; ICALL-PROM-NOT: !"VP" +;--- lib.ll +source_filename = "lib.cc" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@calleeAddrs = global [2 x ptr] [ptr @_ZL7callee0v, ptr @_Z7callee1v] + +define void @_Z7callee1v() { + ret void +} + +define internal void @_ZL7callee0v() { + ret void +} + +define void @_Z11global_funcv() { +entry: + %0 = load ptr, ptr @calleeAddrs + call void %0() + %1 = load ptr, ptr getelementptr inbounds ([2 x ptr], ptr @calleeAddrs, i64 0, i64 1) + call void %1() + ret void +} diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index e6613a90dc7c53..6a71a975fbb12d 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -1379,7 +1379,7 @@ TEST(SymtabTest, instr_prof_symtab_compression_test) { TEST_P(MaybeSparseInstrProfTest, remapping_test) { Writer.addRecord({"_Z3fooi", 0x1234, {1, 2, 3, 4}}, Err); - Writer.addRecord({"file:_Z3barf", 0x567, {5, 6, 7}}, Err); + Writer.addRecord({"file;_Z3barf", 0x567, {5, 6, 7}}, Err); auto Profile = Writer.writeBuffer(); readProfile(std::move(Profile), llvm::MemoryBuffer::getMemBuffer(R"( type i l @@ -1397,7 +1397,7 @@ TEST_P(MaybeSparseInstrProfTest, remapping_test) { EXPECT_EQ(4u, Counts[3]); } - for (StringRef BarName : {"file:_Z3barf", "file:_Z4quuxf"}) { + for (StringRef BarName : {"file;_Z3barf", "file;_Z4quuxf"}) { EXPECT_THAT_ERROR(Reader->getFunctionCounts(BarName, 0x567, Counts), Succeeded()); ASSERT_EQ(3u, Counts.size()); >From cca510e569e9325ec089974e006eb2a8e2d0a2a7 Mon Sep 17 00:00:00 2001 From: mingmingl <mingmi...@google.com> Date: Mon, 18 Dec 2023 23:03:11 -0800 Subject: [PATCH 2/2] Simplify tests and relax restrictive checks. --- compiler-rt/test/profile/CMakeLists.txt | 2 +- ...trprof-thinlto-indirect-call-promotion.cpp | 45 +++++++------------ .../thinlto_indirect_call_promotion.ll | 10 ++--- 3 files changed, 22 insertions(+), 35 deletions(-) diff --git a/compiler-rt/test/profile/CMakeLists.txt b/compiler-rt/test/profile/CMakeLists.txt index eebe0469efebe0..e7494f520aa23a 100644 --- a/compiler-rt/test/profile/CMakeLists.txt +++ b/compiler-rt/test/profile/CMakeLists.txt @@ -6,7 +6,7 @@ set(PROFILE_TESTSUITES) set(PROFILE_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS} compiler-rt-headers) list(APPEND PROFILE_TEST_DEPS profile) if(NOT COMPILER_RT_STANDALONE_BUILD) - list(APPEND PROFILE_TEST_DEPS llvm-cov llvm-dis llvm-lto llvm-profdata opt) + list(APPEND PROFILE_TEST_DEPS llvm-cov llvm-lto llvm-profdata opt) if(NOT APPLE AND COMPILER_RT_HAS_LLD AND "lld" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND PROFILE_TEST_DEPS lld) endif() diff --git a/compiler-rt/test/profile/instrprof-thinlto-indirect-call-promotion.cpp b/compiler-rt/test/profile/instrprof-thinlto-indirect-call-promotion.cpp index 82ca1cd7d0a564..ed7faada0b8630 100644 --- a/compiler-rt/test/profile/instrprof-thinlto-indirect-call-promotion.cpp +++ b/compiler-rt/test/profile/instrprof-thinlto-indirect-call-promotion.cpp @@ -13,6 +13,11 @@ // - Generate ThinLTO summary file with LLVM bitcodes, and run `function-import` pass. // - Run `pgo-icall-prom` pass for the IR module which needs to import callees. +// This test and IR test llvm/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll +// are complementary to each other; a compiler-rt test has better test coverage +// on different platforms, and the IR test is less restrictive in terms of +// running environment and could be executed more widely. + // Use lld as linker for more robust test. We need to REQUIRE LLVMgold.so for // LTO if default linker is GNU ld or gold anyway. // REQUIRES: lld-available @@ -40,12 +45,9 @@ // RUN: env LLVM_PROFILE_FILE=main.profraw %run ./main // RUN: llvm-profdata merge main.profraw -o main.profdata -// Use profile on lib and get bitcode, test that local function callee0 has -// expected !PGOFuncName metadata and external function callee1 doesn't have -// !PGOFuncName metadata. Explicitly skip ICP pass to test ICP happens as +// Use profile on lib and get bitcode. Explicitly skip ICP pass to test ICP happens as // expected in the IR module that imports functions from lib. // RUN: %clang -mllvm -disable-icp -fprofile-use=main.profdata -flto=thin -O2 -c lib.cpp -o lib.bc -// RUN: llvm-dis lib.bc -o - | FileCheck %s --check-prefix=PGOName // Use profile on main and get bitcode. // RUN: %clang -fprofile-use=main.profdata -flto=thin -O2 -c main.cpp -o main.bc @@ -58,35 +60,20 @@ // block with a function-entry-count of one, so they are actually hot functions // per default profile summary hotness cutoff. // RUN: opt -passes=function-import -import-instr-limit=100 -import-cold-multiplier=1 -summary-file summary.thinlto.bc main.bc -o main.import.bc -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS -// Test that '_Z11global_funcv' has indirect calls annotated with value profiles. -// RUN: llvm-dis main.import.bc -o - | FileCheck %s --check-prefix=IR // Test that both candidates are ICP'ed and there is no `!VP` in the IR. // RUN: opt main.import.bc -icp-lto -passes=pgo-icall-prom -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefixes=ICP-IR,ICP-REMARK --implicit-check-not="!VP" -// IMPORTS: main.cpp: Import _Z7callee1v -// IMPORTS: main.cpp: Import _ZL7callee0v.llvm.[[#]] -// IMPORTS: main.cpp: Import _Z11global_funcv - -// PGOName: define {{(dso_local )?}}void @_Z7callee1v() #[[#]] !prof ![[#]] { -// PGOName: define internal void @_ZL7callee0v() #[[#]] !prof ![[#]] !PGOFuncName ![[#MD:]] { -// PGOName: ![[#MD]] = !{!"{{.*}}lib.cpp;_ZL7callee0v"} - -// IR-LABEL: define available_externally {{.*}} void @_Z11global_funcv() {{.*}} !prof ![[#]] { -// IR-NEXT: entry: -// IR-NEXT: %0 = load ptr, ptr @calleeAddrs -// IR-NEXT: tail call void %0(), !prof ![[#PROF1:]] -// IR-NEXT: %1 = load ptr, ptr getelementptr inbounds ([2 x ptr], ptr @calleeAddrs, -// IR-NEXT: tail call void %1(), !prof ![[#PROF2:]] - -// The GUID of indirect callee is the MD5 hash of `/path/to/lib.cpp;_ZL7callee0v` -// that depends on the directory. Use [[#]] for its MD5 hash. -// Use {{.*}} for integer types so the test works on 32-bit and 64-bit systems. -// IR: ![[#PROF1]] = !{!"VP", i32 0, {{.*}} 1, {{.*}} [[#]], {{.*}} 1} -// IR: ![[#PROF2]] = !{!"VP", i32 0, {{.*}} 1, {{.*}} -3993653843325621743, {{.*}} 1} - -// ICP-REMARK: Promote indirect call to _ZL7callee0v.llvm.[[#]] with count 1 out of 1 -// ICP-REMARK: Promote indirect call to _Z7callee1v with count 1 out of 1 +// IMPORTS-DAG: main.cpp: Import {{.*}}callee1{{.*}} +// IMPORTS-DAG: main.cpp: Import {{.*}}callee0{{.*}}llvm.[[#]] +// IMPORTS-DAG: main.cpp: Import {{.*}}global_func{{.*}} + +// PGOName-DAG: define {{.*}}callee1{{.*}} !prof ![[#]] { +// PGOName-DAG: define internal {{.*}}callee0{{.*}} !prof ![[#]] !PGOFuncName ![[#MD:]] { +// PGOName-DAG: ![[#MD]] = !{!"{{.*}}lib.cpp;{{.*}}callee0{{.*}}"} + +// ICP-REMARK: Promote indirect call to {{.*}}callee0{{.*}}llvm.[[#]] with count 1 out of 1 +// ICP-REMARK: Promote indirect call to {{.*}}callee1{{.*}} with count 1 out of 1 // ICP-IR: br i1 %[[#]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof ![[#BRANCH_WEIGHT1:]] // ICP-IR: br i1 %[[#]], label %if.true.direct_targ1, label %if.false.orig_indirect2, !prof ![[#BRANCH_WEIGHT1]] diff --git a/llvm/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll b/llvm/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll index 63e2a7a904a1bd..d2f4696ccf41d7 100644 --- a/llvm/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll +++ b/llvm/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll @@ -23,17 +23,17 @@ ; Test that callee with local linkage has `PGOFuncName` metadata while callee with external doesn't have it. ; RUN: llvm-dis lib.bc -o - | FileCheck %s --check-prefix=PGOName -; PGOName: define void @_Z7callee1v() {{.*}} !prof ![[#]] { -; PGOName: define internal void @_ZL7callee0v() {{.*}} !prof ![[#]] !PGOFuncName ![[#MD:]] { +; PGOName-DAG: define void @_Z7callee1v() {{.*}} !prof ![[#]] { +; PGOName-DAG: define internal void @_ZL7callee0v() {{.*}} !prof ![[#]] !PGOFuncName ![[#MD:]] { ; The source filename of `lib.ll` is specified as "lib.cc" (i.e., the name does ; not change with the directory), so match the full name here. ; PGOName: ![[#MD]] = !{!"lib.cc;_ZL7callee0v"} ; Tests that both external and internal callees are correctly imported. ; RUN: opt -passes=function-import -summary-file summary.thinlto.bc main.bc -o main.import.bc -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS -; IMPORTS: Import _Z7callee1v -; IMPORTS: Import _ZL7callee0v.llvm.[[#]] -; IMPORTS: Import _Z11global_funcv +; IMPORTS-DAG: Import _Z7callee1v +; IMPORTS-DAG: Import _ZL7callee0v.llvm.[[#]] +; IMPORTS-DAG: Import _Z11global_funcv ; Tests that ICP transformations happen. ; Both candidates are ICP'ed, check there is no `!VP` in the IR. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits