https://github.com/ElvisWang123 updated https://github.com/llvm/llvm-project/pull/75021
>From f15627239b333d729fe796e6bbfe9232df30a92e Mon Sep 17 00:00:00 2001 From: Elvis Wang <elvis.w...@sifive.com> Date: Sun, 10 Dec 2023 18:34:37 -0800 Subject: [PATCH] [PGO][nfc] Add `-fdiagnostics-show-profile-count` option to show real loop count from instr-profile The original `-fdiagnostics-show-hotness` option show the relative number of the loop count which is calculate by the `function_entry_count` and `branch_frequency`. We want to know the real loop iteration count in the remark which is collect in the instrument profile, adding a new option to expose the new feature. - Add a new metadata `MD_prof_count` which contains the runtime loop iterations count. For example: ``` loop.header: ... br i1 %0, label %true, label %false, !prof.count !0 ... !0 = !{!"profile_count", !i64 0} ``` - If option `-fdiagnostics-show-profile-count` is set we will append the `MD_prof_count` metadata at the branch instruction at the header of loops. - Show the profile count like hotness with remark. For example: ``` remark: the cost-model indicates that interleaving is not beneficial (ProfileCount: 20) [-Rpass-analysis=loop-vectorize] 38 | for(int i = 0; i < argc % 20; i++){ | ^ ``` --- clang/docs/UsersManual.rst | 24 ++++++ .../clang/Basic/DiagnosticDriverKinds.td | 3 + clang/include/clang/Driver/Options.td | 3 + clang/lib/CodeGen/CGStmt.cpp | 27 ++++++- clang/lib/CodeGen/CodeGenAction.cpp | 4 + clang/lib/CodeGen/CodeGenFunction.h | 1 + clang/lib/CodeGen/CodeGenPGO.cpp | 12 +++ clang/lib/Driver/ToolChains/Clang.cpp | 5 ++ clang/lib/Frontend/CompilerInvocation.cpp | 6 ++ ...ization-remark-with-profile-count.proftext | 9 +++ ...ization-remark-with-profile-count-new-pm.c | 41 +++++++++++ .../Inputs/c-profile-count-metadata.proftext | 32 ++++++++ clang/test/Profile/c-profile-count-metadata.c | 73 +++++++++++++++++++ llvm/docs/LangRef.rst | 22 ++++++ .../llvm/Analysis/OptimizationRemarkEmitter.h | 7 ++ llvm/include/llvm/IR/DiagnosticInfo.h | 6 ++ llvm/include/llvm/IR/FixedMetadataKinds.def | 1 + llvm/include/llvm/IR/MDBuilder.h | 3 + llvm/include/llvm/Remarks/Remark.h | 4 + .../Analysis/OptimizationRemarkEmitter.cpp | 22 ++++++ llvm/lib/IR/LLVMRemarkStreamer.cpp | 1 + llvm/lib/IR/MDBuilder.cpp | 10 +++ llvm/lib/Remarks/Remark.cpp | 2 + llvm/lib/Remarks/YAMLRemarkSerializer.cpp | 7 +- 24 files changed, 319 insertions(+), 6 deletions(-) create mode 100644 clang/test/Frontend/Inputs/optimization-remark-with-profile-count.proftext create mode 100644 clang/test/Frontend/optimization-remark-with-profile-count-new-pm.c create mode 100644 clang/test/Profile/Inputs/c-profile-count-metadata.proftext create mode 100644 clang/test/Profile/c-profile-count-metadata.c diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index f1b344ef5109b..e3e4b585713df 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -425,6 +425,30 @@ output format of the diagnostics that it generates. If this option is not used, all the passes are included in the optimization record. +.. option:: -fdiagnostics-show-profile-count + + Enable profile loop count information in diagnostic line. + + This option controls whether Clang prints the profile loop count associated + with diagnostics in the presence of profile-guided optimization information. + This is currently supported with optimization remarks (see + :ref:`Options to Emit Optimization Reports <rpass>`). The profile count information + allows users to focus on the hot optimization remarks that are likely to be + more relevant for run-time performance. The main difference between profile count + the hotness is the profile count is the real profile count from the runtime + profile and hotness is a relative number calculated by function entry count and + weight. + + For example, in this output, the block containing the callsite of `foo` was + executed 3000 times according to the profile data: + + :: + + s.c:38:3: remark: the cost-model indicates that interleaving is not beneficial (ProfileCount: 20) [-Rpass-analysis=loop-vectorize] + for(int i = 0; i < 20; i++){ + ^ + + .. _opt_fdiagnostics-show-hotness: .. option:: -f[no-]diagnostics-show-hotness diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 676f1a62b49dd..47ad1e058a1d8 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -420,6 +420,9 @@ def warn_drv_empty_joined_argument : Warning< def warn_drv_diagnostics_hotness_requires_pgo : Warning< "argument '%0' requires profile-guided optimization information">, InGroup<UnusedCommandLineArgument>; +def warn_drv_diagnostics_profile_count_requires_pgo : Warning< + "argument '%0' requires profile-guided optimization information">, + InGroup<UnusedCommandLineArgument>; def warn_drv_diagnostics_misexpect_requires_pgo : Warning< "argument '%0' requires profile-guided optimization information">, InGroup<UnusedCommandLineArgument>; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index b959fd20fe413..78914e88350a5 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1933,6 +1933,9 @@ defm diagnostics_show_hotness : BoolFOption<"diagnostics-show-hotness", PosFlag<SetTrue, [], [ClangOption, CC1Option], "Enable profile hotness information in diagnostic line">, NegFlag<SetFalse>>; +def fdiagnostics_show_profile_count : Flag<["-"], "fdiagnostics-show-profile-count">, + Group<f_clang_Group>, Visibility<[ClangOption, CC1Option]>, + HelpText<"Show the real loop counts from the runtime profile">; def fdiagnostics_hotness_threshold_EQ : Joined<["-"], "fdiagnostics-hotness-threshold=">, Group<f_Group>, Visibility<[ClangOption, CC1Option]>, MetaVarName<"<value>">, diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index a5cb80640641b..7cbb6e10cb138 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -32,6 +32,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/Support/SaveAndRestore.h" #include <optional> @@ -923,7 +924,12 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S, if (!Weights && CGM.getCodeGenOpts().OptimizationLevel) BoolCondVal = emitCondLikelihoodViaExpectIntrinsic( BoolCondVal, Stmt::getLikelihood(S.getBody())); - Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock, Weights); + auto *Branch = + Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock, Weights); + // Appending the profle count metadata on the Branch instruction for the + // profile count + Branch->setMetadata(llvm::LLVMContext::MD_prof_count, + createProfileCount(getProfileCount(S.getBody()))); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); @@ -1014,9 +1020,13 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S, // As long as the condition is true, iterate the loop. if (EmitBoolCondBranch) { uint64_t BackedgeCount = getProfileCount(S.getBody()) - ParentCount; - Builder.CreateCondBr( + auto *Branch = Builder.CreateCondBr( BoolCondVal, LoopBody, LoopExit.getBlock(), createProfileWeightsForLoop(S.getCond(), BackedgeCount)); + // Appending the profile count metadata on the Branch instruction for the + // profile count + Branch->setMetadata(llvm::LLVMContext::MD_prof_count, + createProfileCount(getProfileCount(S.getBody()))); } LoopStack.pop(); @@ -1104,7 +1114,12 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S, BoolCondVal = emitCondLikelihoodViaExpectIntrinsic( BoolCondVal, Stmt::getLikelihood(S.getBody())); - Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock, Weights); + auto *Branch = + Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock, Weights); + // Appending the profile count metadata on the Branch instruction for the + // profile count + Branch->setMetadata(llvm::LLVMContext::MD_prof_count, + createProfileCount(getProfileCount(S.getBody()))); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); @@ -1188,7 +1203,11 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S, if (!Weights && CGM.getCodeGenOpts().OptimizationLevel) BoolCondVal = emitCondLikelihoodViaExpectIntrinsic( BoolCondVal, Stmt::getLikelihood(S.getBody())); - Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock, Weights); + auto *Branch = Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock, Weights); + // Appending the profile count metadata on the Branch instruction for the + // profile count + Branch->setMetadata(llvm::LLVMContext::MD_prof_count, + createProfileCount(getProfileCount(S.getBody()))); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index bb6b1a3bc228c..fbbd5bf25898e 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -716,6 +716,10 @@ void BackendConsumer::EmitOptimizationMessage( if (D.getHotness()) MsgStream << " (hotness: " << *D.getHotness() << ")"; + if (D.getProfileCount()) { + MsgStream << " (ProfileCount: " << *D.getProfileCount() << ")"; + } + Diags.Report(Loc, DiagID) << AddFlagValue(D.getPassName()) << MsgStream.str(); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 618e78809db40..12cb0645f2f57 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1527,6 +1527,7 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::MDNode *createProfileWeights(ArrayRef<uint64_t> Weights) const; llvm::MDNode *createProfileWeightsForLoop(const Stmt *Cond, uint64_t LoopCount) const; + llvm::MDNode *createProfileCount(uint64_t Count) const; public: /// Increment the profiler's counter for the given statement by \p StepV. diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b16..8752f8e8269b8 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -23,6 +23,11 @@ #include "llvm/Support/MD5.h" #include <optional> +static llvm::cl::opt<bool> ClEnableProfileCountMetadata( + "enable-profile-count-metadata", + llvm::cl::desc("Appending real executation count of loops from runtime"), + llvm::cl::Hidden, llvm::cl::init(false)); + static llvm::cl::opt<bool> EnableValueProfiling("enable-value-profiling", llvm::cl::desc("Enable value profiling"), @@ -1122,3 +1127,10 @@ CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond, return createProfileWeights(LoopCount, std::max(*CondCount, LoopCount) - LoopCount); } + +llvm::MDNode *CodeGenFunction::createProfileCount(uint64_t Count) const { + if (!PGO.haveRegionCounts() || !ClEnableProfileCountMetadata) + return nullptr; + llvm::MDBuilder MDHelper(CGM.getLLVMContext()); + return MDHelper.createProfileCount(Count); +} diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index eb26bfade47b7..0dfa40cf92f28 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4065,6 +4065,11 @@ static void RenderDiagnosticsOptions(const Driver &D, const ArgList &Args, Args.addOptOutFlag(CmdArgs, options::OPT_fspell_checking, options::OPT_fno_spell_checking); + + // Show iteration counts of loops by runtime profile. + if (Args.hasArg(options::OPT_fdiagnostics_show_profile_count)) + CmdArgs.append({"-fdiagnostics-show-profile-count", "-mllvm", + "-enable-profile-count-metadata"}); } DwarfFissionKind tools::getDebugFissionKind(const Driver &D, diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index b33bdad2ad81b..11568eb2bdbd4 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2091,6 +2091,12 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, bool UsingProfile = UsingSampleProfile || !Opts.ProfileInstrumentUsePath.empty(); + if (Args.hasArg(options::OPT_fdiagnostics_show_profile_count) && + !UsingProfile && IK.getLanguage() != Language::LLVM_IR) { + Diags.Report(diag::warn_drv_diagnostics_profile_count_requires_pgo) + << "-fdiagnostics-show-profile-count"; + } + if (Opts.DiagnosticsWithHotness && !UsingProfile && // An IR file will contain PGO as metadata IK.getLanguage() != Language::LLVM_IR) diff --git a/clang/test/Frontend/Inputs/optimization-remark-with-profile-count.proftext b/clang/test/Frontend/Inputs/optimization-remark-with-profile-count.proftext new file mode 100644 index 0000000000000..1885f75326578 --- /dev/null +++ b/clang/test/Frontend/Inputs/optimization-remark-with-profile-count.proftext @@ -0,0 +1,9 @@ +main +# Func Hash: +1160280 +# Num Counters: +2 +# Counter Values: +1 +20 + diff --git a/clang/test/Frontend/optimization-remark-with-profile-count-new-pm.c b/clang/test/Frontend/optimization-remark-with-profile-count-new-pm.c new file mode 100644 index 0000000000000..0cfe95a1a90d4 --- /dev/null +++ b/clang/test/Frontend/optimization-remark-with-profile-count-new-pm.c @@ -0,0 +1,41 @@ +// Testing the remark output of the `-fdiagnostics-show-profile-count`. + +// Generate instrumentation and sampling profile data. +// RUN: llvm-profdata merge \ +// RUN: %S/Inputs/optimization-remark-with-profile-count.proftext \ +// RUN: -o %t.profdata +// +// RUN: %clang -fprofile-instr-use=%t.profdata \ +// RUN: -O2 -Rpass=loop-vectorize -Rpass-analysis=loop-vectorize \ +// RUN: -Rpass-missed=loop-vecotrize \ +// RUN: -fdiagnostics-show-profile-count \ +// RUN: 2>&1 %s\ +// RUN: | FileCheck -check-prefix=SHOW_PROFILE_COUNT %s +// RUN: %clang -fprofile-instr-use=%t.profdata \ +// RUN: -O2 -Rpass=loop-vectorize -Rpass-analysis=loop-vectorize \ +// RUN: -Rpass-missed=loop-vecotrize \ +// RUN: -fdiagnostics-show-profile-count -fdiagnostics-show-hotness \ +// RUN: 2>&1 %s\ +// RUN: | FileCheck -check-prefix=SHOW_PROFILE_COUNT_AND_HOTNESS %s +// RUN: %clang \ +// RUN: -O2 -Rpass=loop-vectorize -Rpass-analysis=loop-vectorize \ +// RUN: -Rpass-missed=loop-vecotrize \ +// RUN: -fdiagnostics-show-profile-count \ +// RUN: 2>&1 %s\ +// RUN: | FileCheck -check-prefix=NO_PGO %s + +int sum = 0; +int x[20] = {0, 112, 32, 11, 99, 88, 99, 88,34, 342, 85,99, 43, 75, 71, 871, 84, 65, 37, 98}; + +// SHOW_PROFILE_COUNT_AND_HOTNESS: hotness: {{[0-9]+}} +// SHOW_PROFILE_COUNT_AND_HOTNESS: ProfileCount: {{[0-9]+}} +// SHOW_PROFILE_COUNT: ProfileCount: {{[0-9]+}} +// NO_PGO: argument '-fdiagnostics-show-profile-count' requires profile-guided optimization information +int main(int argc, const char *argv[]) { +#pragma clang loop vectorize(enable) + for(int i = 0; i < argc % 20; i++){ + sum += x[i]; + sum += argc; + } + return sum; +} diff --git a/clang/test/Profile/Inputs/c-profile-count-metadata.proftext b/clang/test/Profile/Inputs/c-profile-count-metadata.proftext new file mode 100644 index 0000000000000..d880663fed32d --- /dev/null +++ b/clang/test/Profile/Inputs/c-profile-count-metadata.proftext @@ -0,0 +1,32 @@ +never_called +6820425066224770721 +9 +0 +0 +0 +0 +0 +0 +0 +0 +0 + +main +24 +1 +1 + +dead_code +5254464978620792806 +10 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 + diff --git a/clang/test/Profile/c-profile-count-metadata.c b/clang/test/Profile/c-profile-count-metadata.c new file mode 100644 index 0000000000000..99089cd5da360 --- /dev/null +++ b/clang/test/Profile/c-profile-count-metadata.c @@ -0,0 +1,73 @@ +// Copy from c-unprofiled-blocks.c but testing the `MD_prof_count`, which will +// will generate the MD node no matter the code is dead or not. + +// RUN: llvm-profdata merge %S/Inputs/c-profile-count-metadata.proftext -o %t.profdata +// RUN: %clang_cc1 -mllvm -enable-profile-count-metadata -triple x86_64-apple-macosx10.9 \ +// RUN: -main-file-name c-profile-count-metadata.c %s -o - \ +// RUN: -emit-llvm -fprofile-instrument-use-path=%t.profdata | FileCheck -check-prefix=PGOUSE %s + +// PGOUSE-LABEL: @never_called(i32 noundef %i) +int never_called(int i) { + // PGOUSE: br i1 %{{[^,]*}}, label %{{[^,]*}}, label %{{[^,]*}}{{$}} + if (i) {} + + // PGOUSE: br i1 %{{[^,]*}}, label %{{[^,]*}}, label %{{[^,]*}}, !prof.count !{{[0-9]+}}{{$}} + for (i = 0; i < 100; ++i) { + } + + // PGOUSE: br i1 %{{[^,]*}}, label %{{[^,]*}}, label %{{[^,]*}}, !prof.count !{{[0-9]+}}{{$}} + while (--i) {} + + // PGOUSE: br i1 %{{[^,]*}}, label %{{[^,]*}}, label %{{[^,]*}}, !llvm.loop [[LOOP1:!.*]], !prof.count !{{[0-9]+}} + do {} while (i++ < 75); + + // PGOUSE: switch {{.*}} [ + // PGOUSE-NEXT: i32 12 + // PGOUSE-NEXT: i32 82 + // PGOUSE-NEXT: ]{{$}} + switch (i) { + case 12: return 3; + case 82: return 0; + default: return 89; + } +} + +// PGOUSE-LABEL: @dead_code(i32 noundef %i) +int dead_code(int i) { + // PGOUSE: br {{.*}}, !prof !{{[0-9]+}} + if (i) { + // This branch is never reached. + + // PGOUSE: br i1 %{{[^,]*}}, label %{{[^,]*}}, label %{{[^,]*}}{{$}} + if (!i) {} + + // PGOUSE: br i1 %{{[^,]*}}, label %{{[^,]*}}, label %{{[^,]*}}, !prof.count !{{[0-9]+}}{{$}} + for (i = 0; i < 100; ++i) { + } + + // PGOUSE: br i1 %{{[^,]*}}, label %{{[^,]*}}, label %{{[^,]*}}, !prof.count !{{[0-9]+}}{{$}} + while (--i) {} + + // PGOUSE: br i1 %{{[^,]*}}, label %{{[^,]*}}, label %{{[^,]*}}, !llvm.loop [[LOOP2:!.*]], !prof.count !{{[0-9]+}} + do {} while (i++ < 75); + + // PGOUSE: switch {{.*}} [ + // PGOUSE-NEXT: i32 12 + // PGOUSE-NEXT: i32 82 + // PGOUSE-NEXT: ]{{$}} + switch (i) { + case 12: return 3; + case 82: return 0; + default: return 89; + } + } + return 2; +} + +// PGOUSE-LABEL: @main(i32 noundef %argc, ptr noundef %argv) +int main(int argc, const char *argv[]) { + dead_code(0); + return 0; +} + +// PGOUSE: !{{[0-9]+}} = !{!"profile_count", i64 0} diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index adda52b33c789..7bf7f0728f187 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -7649,6 +7649,28 @@ allocation itself) to the outermost callsite context required for uniquely identifying the described profile behavior (note this may not be the top of the profiled call stack). +.. _md_prof_count: + +'``prof.count``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``prof.count`` metadata is used to record runtime profile data on loop +executation times. The difference between ``prof`` metadata is this metadata record +the real execution time in the runtime profile. + +Example: + +.. code-block:: text + + for.header: + ... + br i1 %1, label %for.body, label %for.exit, !prof.count !0 + !0 = !{!"profile_count", !64 100} + +Each of the ``prof.count`` metadata contains two element, the first element is the +string indicate this metadata is for profile_count, and the second element is the +executation times current loop runs at the runtime. + .. _md_callsite: '``callsite``' Metadata diff --git a/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h b/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h index 8aaeaf2991029..e7daf72502877 100644 --- a/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h +++ b/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h @@ -118,6 +118,13 @@ class OptimizationRemarkEmitter { /// available. std::optional<uint64_t> computeHotness(const Value *V); + /// Compute profile count from IR value (currently assumed to be a block) if + /// PGO is available. + std::optional<uint64_t> getProfileCount(const Value *V); + + /// Similar but use value from \p OptDiag and update profile count there. + void getProfileCount(DiagnosticInfoIROptimization &OptDiag); + /// Similar but use value from \p OptDiag and update hotness there. void computeHotness(DiagnosticInfoIROptimization &OptDiag); diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h index 628445fe9fb2c..125f9302b75ed 100644 --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -481,6 +481,8 @@ class DiagnosticInfoOptimizationBase : public DiagnosticInfoWithLocationBase { std::string getMsg() const; std::optional<uint64_t> getHotness() const { return Hotness; } void setHotness(std::optional<uint64_t> H) { Hotness = H; } + std::optional<uint64_t> getProfileCount() const { return ProfileCount; } + void setProfileCount(std::optional<uint64_t> Count) { ProfileCount = Count; } bool isVerbose() const { return IsVerbose; } @@ -523,6 +525,10 @@ class DiagnosticInfoOptimizationBase : public DiagnosticInfoWithLocationBase { /// corresponding code was executed in a profile instrumentation run. std::optional<uint64_t> Hotness; + /// If profile information is available, this is the REAL number of times the + /// corresponding code was executed in a profile instrumentation run. + std::optional<uint64_t> ProfileCount; + /// Arguments collected via the streaming interface. SmallVector<Argument, 4> Args; diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index b375d0f091206..20abd5f19c8e3 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -51,3 +51,4 @@ LLVM_FIXED_MD_KIND(MD_kcfi_type, "kcfi_type", 36) LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 37) LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38) LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39) +LLVM_FIXED_MD_KIND(MD_prof_count, "prof.count", 40) diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h index 39165453de16b..adfc733baa67b 100644 --- a/llvm/include/llvm/IR/MDBuilder.h +++ b/llvm/include/llvm/IR/MDBuilder.h @@ -67,6 +67,9 @@ class MDBuilder { /// Return metadata specifying that a branch or switch is unpredictable. MDNode *createUnpredictable(); + /// Create the `profile_count` metadata at the branch instrucion + MDNode *createProfileCount(uint64_t Count); + /// Return metadata containing the entry \p Count for a function, a boolean /// \Synthetic indicating whether the counts were synthetized, and the /// GUIDs stored in \p Imports that need to be imported for sample PGO, to diff --git a/llvm/include/llvm/Remarks/Remark.h b/llvm/include/llvm/Remarks/Remark.h index de81c5a992805..7bfbcb64b847f 100644 --- a/llvm/include/llvm/Remarks/Remark.h +++ b/llvm/include/llvm/Remarks/Remark.h @@ -116,6 +116,10 @@ struct Remark { /// corresponding code was executed in a profile instrumentation run. std::optional<uint64_t> Hotness; + /// If profile information is available, this is the number of real times the + /// corresponding code was executed in a profile instrumentation run. + std::optional<uint64_t> ProfileCount; + /// Arguments collected via the streaming interface. SmallVector<Argument, 5> Args; diff --git a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp index 95d55ea44dbfd..73afa43b53b63 100644 --- a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp +++ b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp @@ -16,6 +16,8 @@ #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/LLVMContext.h" @@ -76,10 +78,30 @@ void OptimizationRemarkEmitter::computeHotness( OptDiag.setHotness(computeHotness(V)); } +void OptimizationRemarkEmitter::getProfileCount( + DiagnosticInfoIROptimization &OptDiag) { + const Value *V = OptDiag.getCodeRegion(); + if (V) + OptDiag.setProfileCount(getProfileCount(V)); +} + +std::optional<uint64_t> +OptimizationRemarkEmitter::getProfileCount(const Value *V) { + auto *Term = cast<BasicBlock>(V)->getTerminator(); + auto *ProfileData = Term->getMetadata(LLVMContext::MD_prof_count); + // TODO: Add check if the metadata match 'profile_count' + if (ProfileData == nullptr) + return std::nullopt; + ValueAsMetadata *VM = + static_cast<llvm::ValueAsMetadata *>(ProfileData->getOperand(1).get()); + return static_cast<llvm::ConstantInt *>(VM->getValue())->getZExtValue(); +} + void OptimizationRemarkEmitter::emit( DiagnosticInfoOptimizationBase &OptDiagBase) { auto &OptDiag = cast<DiagnosticInfoIROptimization>(OptDiagBase); computeHotness(OptDiag); + getProfileCount(OptDiag); // Only emit it if its hotness meets the threshold. if (OptDiag.getHotness().value_or(0) < diff --git a/llvm/lib/IR/LLVMRemarkStreamer.cpp b/llvm/lib/IR/LLVMRemarkStreamer.cpp index 71f8d4a4b1c7c..d19e46bb12f86 100644 --- a/llvm/lib/IR/LLVMRemarkStreamer.cpp +++ b/llvm/lib/IR/LLVMRemarkStreamer.cpp @@ -67,6 +67,7 @@ LLVMRemarkStreamer::toRemark(const DiagnosticInfoOptimizationBase &Diag) const { GlobalValue::dropLLVMManglingEscape(Diag.getFunction().getName()); R.Loc = toRemarkLocation(Diag.getLocation()); R.Hotness = Diag.getHotness(); + R.ProfileCount = Diag.getProfileCount(); for (const DiagnosticInfoOptimizationBase::Argument &Arg : Diag.getArgs()) { R.Args.emplace_back(); diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp index 2490b3012bdc2..a31ea8159d44e 100644 --- a/llvm/lib/IR/MDBuilder.cpp +++ b/llvm/lib/IR/MDBuilder.cpp @@ -52,6 +52,16 @@ MDNode *MDBuilder::createBranchWeights(ArrayRef<uint32_t> Weights) { return MDNode::get(Context, Vals); } +MDNode *MDBuilder::createProfileCount(uint64_t Count) { + SmallVector<Metadata *, 2> Vals(2); + Vals[0] = createString("profile_count"); + + Type *Int64Ty = Type::getInt64Ty(Context); + Vals[1] = createConstant(ConstantInt::get(Int64Ty, Count)); + + return MDNode::get(Context, Vals); +} + MDNode *MDBuilder::createUnpredictable() { return MDNode::get(Context, std::nullopt); } diff --git a/llvm/lib/Remarks/Remark.cpp b/llvm/lib/Remarks/Remark.cpp index ef42271a3c8da..7a5e47b4c5a1d 100644 --- a/llvm/lib/Remarks/Remark.cpp +++ b/llvm/lib/Remarks/Remark.cpp @@ -56,6 +56,8 @@ void Remark::print(raw_ostream &OS) const { OS << "Loc: " << Loc.value(); if (Hotness) OS << "Hotness: " << Hotness; + if (ProfileCount) + OS << "ProfileCount: " << ProfileCount; if (!Args.empty()) { OS << "Args:\n"; for (auto Arg : Args) diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp index 68285c3dde1bf..f5cf73173bf92 100644 --- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp +++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp @@ -25,12 +25,14 @@ template <typename T> static void mapRemarkHeader(yaml::IO &io, T PassName, T RemarkName, std::optional<RemarkLocation> RL, T FunctionName, std::optional<uint64_t> Hotness, + std::optional<uint64_t> ProfileCount, ArrayRef<Argument> Args) { io.mapRequired("Pass", PassName); io.mapRequired("Name", RemarkName); io.mapOptional("DebugLoc", RL); io.mapRequired("Function", FunctionName); io.mapOptional("Hotness", Hotness); + io.mapOptional("ProfileCount", ProfileCount); io.mapOptional("Args", Args); } @@ -66,10 +68,11 @@ template <> struct MappingTraits<remarks::Remark *> { unsigned NameID = StrTab.add(Remark->RemarkName).first; unsigned FunctionID = StrTab.add(Remark->FunctionName).first; mapRemarkHeader(io, PassID, NameID, Remark->Loc, FunctionID, - Remark->Hotness, Remark->Args); + Remark->Hotness, Remark->ProfileCount, Remark->Args); } else { mapRemarkHeader(io, Remark->PassName, Remark->RemarkName, Remark->Loc, - Remark->FunctionName, Remark->Hotness, Remark->Args); + Remark->FunctionName, Remark->Hotness, + Remark->ProfileCount, Remark->Args); } } }; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits