https://gcc.gnu.org/g:74e691a8e31a81019e0fda0f919d721d3092ad54
commit r16-1121-g74e691a8e31a81019e0fda0f919d721d3092ad54 Author: Kugan Vivekanandarajah <kvivekana...@nvidia.com> Date: Thu Jun 5 07:15:34 2025 +1000 [AutoFDO] Profile merging for clone test This patch introduces a new testcase to verify the merging of profiles is performed for cloned functions. Since this is invoked very early, before the pass manager, we need to set up the dumping explicitly. This is similar to the handling in finish_optimization_passes. gcc/ChangeLog: * auto-profile.cc (autofdo_source_profile::read): Dump message while merging profile. * pass_manager.h (get_pass_auto_profile): New. gcc/testsuite/ChangeLog: * gcc.dg/tree-prof/clone-merge-1.c: New test. Signed-off-by: Kugan Vivekanandarajah <kvivekana...@nvidia.com> Diff: --- gcc/auto-profile.cc | 20 +++++++++++++++- gcc/pass_manager.h | 1 + gcc/testsuite/gcc.dg/tree-prof/clone-merge-1.c | 32 ++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc index 91cc8db2c832..215dadf87c21 100644 --- a/gcc/auto-profile.cc +++ b/gcc/auto-profile.cc @@ -35,6 +35,8 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-core.h" #include "profile.h" #include "langhooks.h" +#include "context.h" +#include "pass_manager.h" #include "cfgloop.h" #include "tree-cfg.h" #include "tree-cfgcleanup.h" @@ -858,6 +860,9 @@ autofdo_source_profile::read () /* Read in the function/callsite profile, and store it in local data structure. */ unsigned function_num = gcov_read_unsigned (); + int profile_pass_num + = g->get_passes ()->get_pass_auto_profile ()->static_pass_number; + g->get_dumps ()->dump_start (profile_pass_num, NULL); for (unsigned i = 0; i < function_num; i++) { function_instance::function_instance_stack stack; @@ -870,8 +875,21 @@ autofdo_source_profile::read () if (map_.count (fun_id) == 0) map_[fun_id] = s; else - map_[fun_id]->merge (s); + { + /* Since this is invoked very early, before the pass + manager, we need to set up the dumping explicitly. This is + similar to the handling in finish_optimization_passes. */ + if (dump_enabled_p ()) + { + dump_user_location_t loc + = dump_user_location_t::from_location_t (input_location); + dump_printf_loc (MSG_NOTE, loc, "Merging profile for %s\n", + afdo_string_table->get_name (s->name ())); + } + map_[fun_id]->merge (s); + } } + g->get_dumps ()->dump_finish (profile_pass_num); return true; } diff --git a/gcc/pass_manager.h b/gcc/pass_manager.h index d4f89004559b..4de4a482ca1e 100644 --- a/gcc/pass_manager.h +++ b/gcc/pass_manager.h @@ -74,6 +74,7 @@ public: } opt_pass *get_pass_peephole2 () const { return m_pass_peephole2_1; } opt_pass *get_pass_profile () const { return m_pass_profile_1; } + opt_pass *get_pass_auto_profile () const { return m_pass_ipa_auto_profile_1; } void register_pass_name (opt_pass *pass, const char *name); diff --git a/gcc/testsuite/gcc.dg/tree-prof/clone-merge-1.c b/gcc/testsuite/gcc.dg/tree-prof/clone-merge-1.c new file mode 100644 index 000000000000..40aab9fdfca3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-prof/clone-merge-1.c @@ -0,0 +1,32 @@ +/* { dg-options "-O3 -fno-early-inlining -fdump-ipa-afdo-all" } */ +__attribute__ ((used)) +int a[1000]; + +__attribute__ ((noinline)) +void +test2(int sz) +{ + a[sz]++; + asm volatile (""::"m"(a)); +} + +__attribute__ ((noinline)) +void +test1 (int sz) +{ + for (int i = 0; i < 1000; i++) + if (i % 2) + test2 (sz); + else + test2 (i); + +} +int main() +{ + for (int i = 0; i < 1000; i++) + test1 (1000); + return 0; +} +/* We will have profiles for test2 and test2.constprop.0 that will have to be + merged, */ +/* { dg-final-use-autofdo { scan-ipa-dump "note: Merging profile for test2" "afdo"} } */