https://github.com/w2yehia updated 
https://github.com/llvm/llvm-project/pull/202487

>From ce4183d294d60bea5184921253ead763ab62b5c8 Mon Sep 17 00:00:00 2001
From: Wael Yehia <[email protected]>
Date: Wed, 3 Jun 2026 16:24:51 -0400
Subject: [PATCH 1/8] [PGO] Implement PGO counter promotion for atomic updates

Currently PGO counter updates are promoted/hoisted out of loops where
possible, in order to reduce memory accesses. The promotion is
implemented via the LoadAndStorePromoter and SSAUpdater classes.
When the updates are relaxed atomic, however, hoisting doesn't happen.

Reading the semantics of relaxed atomics, it should be legal to do
similar promotions, but teaching LoadAndStorePromoter and SSAUpdater
seems like alot of work and would touch common code used by alot of
llvm optimizations such as SROA.
An easier approach is to perform the promotions on non-atomic updates,
then transform the promoted updates to (relaxed) atomic.
---
 .../Instrumentation/InstrProfiling.cpp        | 76 ++++++++++++++++++-
 .../PGOProfile/atomic_counter_promote.ll      | 61 +++++++++++++++
 2 files changed, 134 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/PGOProfile/atomic_counter_promote.ll

diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp 
b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 8e4ba41919768..ed01912ccffbd 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
@@ -38,6 +39,7 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -117,6 +119,16 @@ cl::opt<bool> AtomicCounterUpdateAll(
     cl::desc("Make all profile counter updates atomic (for testing only)"),
     cl::init(false));
 
+cl::opt<bool> AtomicCounterPromote(
+    "atomic-counter-promote",
+    cl::desc("Atomize profile counter updates and promote where possible"),
+    cl::init(false));
+
+cl::opt<bool> SanityCheck(
+    "atomic-counter-promote-check",
+    cl::desc("Check that all profile counter updates were made atomic"),
+    cl::init(false));
+
 cl::opt<bool> AtomicCounterUpdatePromoted(
     "atomic-counter-update-promoted",
     cl::desc("Do counter update using atomic fetch add "
@@ -225,6 +237,20 @@ static SampledInstrumentationConfig 
getSampledInstrumentationConfig() {
 
 using LoadStorePair = std::pair<Instruction *, Instruction *>;
 
+static void makeAtomic(Instruction *Load, Instruction *Store) {
+  // assert the load and store are accessing the same memory?
+  auto *Addition = dyn_cast<BinaryOperator>(Store->getOperand(0));
+  assert(Addition && Addition->getOpcode() == Instruction::BinaryOps::Add);
+  auto *Addend = Addition->getOperand(1);
+
+  IRBuilder<> Builder(Load);
+  Builder.CreateAtomicRMW(AtomicRMWInst::Add, Store->getOperand(1), Addend,
+                          MaybeAlign(), AtomicOrdering::Monotonic);
+  Store->eraseFromParent();
+  Addition->eraseFromParent();
+  Load->eraseFromParent();
+}
+
 static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) {
   auto *MD = dyn_cast_or_null<ConstantAsMetadata>(M.getModuleFlag(Flag));
   if (!MD)
@@ -470,6 +496,12 @@ class PGOCounterPromoterHelper : public 
LoadAndStorePromoter {
         Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
                                 MaybeAlign(),
                                 AtomicOrdering::SequentiallyConsistent);
+      // Generate the relaxed atomic RMW if we've asked for it and no more
+      // promotion is possible.
+      else if (AtomicCounterPromote &&
+               (!IterativeCounterPromotion || !LI.getLoopFor(ExitBlock)))
+        Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
+                                MaybeAlign(), AtomicOrdering::Monotonic);
       else {
         LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
         auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
@@ -524,6 +556,20 @@ class PGOCounterPromoter {
   }
 
   bool run(int64_t *NumPromoted) {
+    // In this function we examine loop L and other parameters to decide what
+    // candidates are promotable. Once we've promoted what we can, we convert
+    // all remaining candidates to use atomics. This requires that promoted
+    // candidates are set to nullptr in the LoopToCandidates[&L] array.
+    llvm::scope_exit Cleanup([&]() {
+      if (!AtomicCounterPromote)
+        return;
+      for (auto &Cand : LoopToCandidates[&L]) {
+        if (Cand.first != nullptr && Cand.second != nullptr)
+          makeAtomic(Cand.first, Cand.second);
+        Cand = {nullptr, nullptr};
+      }
+    });
+
     // Skip 'infinite' loops:
     if (ExitBlocks.size() == 0)
       return false;
@@ -545,7 +591,6 @@ class PGOCounterPromoter {
 
     unsigned Promoted = 0;
     for (auto &Cand : LoopToCandidates[&L]) {
-
       SmallVector<PHINode *, 4> NewPHIs;
       SSAUpdater SSA(&NewPHIs);
       Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
@@ -567,6 +612,7 @@ class PGOCounterPromoter {
                                         L.getLoopPreheader(), ExitBlocks,
                                         InsertPts, LoopToCandidates, LI);
       Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
+      Cand = {nullptr, nullptr};
       Promoted++;
       if (Promoted >= MaxProm)
         break;
@@ -870,10 +916,27 @@ bool InstrLowerer::isSamplingEnabled() const {
 bool InstrLowerer::isCounterPromotionEnabled() const {
   if (DoCounterPromotion.getNumOccurrences() > 0)
     return DoCounterPromotion;
-
+  if (AtomicCounterPromote)
+    return true;
   return Options.DoCounterPromotion;
 }
 
+static void doAtomicPromotionCheck(Function *F) {
+  for (const llvm::Instruction &I : llvm::instructions(F)) {
+    const Value *Addr = nullptr;
+    if (const LoadInst *LI = dyn_cast<LoadInst>(&I))
+      Addr = LI->getOperand(0);
+    else if (const StoreInst *LI = dyn_cast<StoreInst>(&I))
+      Addr = LI->getOperand(1);
+
+    if (Addr && Addr->stripInBoundsOffsets()->getName().starts_with(
+                    getInstrProfCountersVarPrefix())) {
+      LLVM_DEBUG(dbgs() << "Missed candidate: "; I.dump());
+      assert(false && "Candidate load/store not converted to atomic");
+    }
+  }
+}
+
 void InstrLowerer::promoteCounterLoadStores(Function *F) {
   if (!isCounterPromotionEnabled())
     return;
@@ -894,8 +957,11 @@ void InstrLowerer::promoteCounterLoadStores(Function *F) {
     auto *CounterStore = LoadStore.second;
     BasicBlock *BB = CounterLoad->getParent();
     Loop *ParentLoop = LI.getLoopFor(BB);
-    if (!ParentLoop)
+    if (!ParentLoop) {
+      if (AtomicCounterPromote)
+        makeAtomic(CounterLoad, CounterStore);
       continue;
+    }
     LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, 
CounterStore);
   }
 
@@ -907,6 +973,9 @@ void InstrLowerer::promoteCounterLoadStores(Function *F) {
     PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
     Promoter.run(&TotalCountersPromoted);
   }
+
+  if (AtomicCounterPromote && SanityCheck)
+    doAtomicPromotionCheck(F);
 }
 
 static bool needsRuntimeHookUnconditionally(const Triple &TT) {
@@ -1224,6 +1293,7 @@ void InstrLowerer::lowerIncrement(InstrProfIncrementInst 
*Inc) {
     Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
     auto *Count = Builder.CreateAdd(Load, Inc->getStep());
     auto *Store = Builder.CreateStore(Count, Addr);
+
     if (isCounterPromotionEnabled())
       PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
   }
diff --git a/llvm/test/Transforms/PGOProfile/atomic_counter_promote.ll 
b/llvm/test/Transforms/PGOProfile/atomic_counter_promote.ll
new file mode 100644
index 0000000000000..28c83eb1a2aa5
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/atomic_counter_promote.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -passes=instrprof -atomic-counter-promote -S | FileCheck %s
+
+; CHECK: define i32 @foo(i32 %n) {
+; CHECK: entry:
+; CHECK:   atomicrmw add {{.*}}ptr @__profc_foo
+;
+; CHECK: for.cond.for.cond.cleanup_crit_edge:
+; CHECK-NOT: br
+; CHECK:      atomicrmw add {{.*}}ptr @__profc_foo
+; CHECK-NEXT: atomicrmw add {{.*}}ptr @__profc_foo
+
+@__profn_foo = private constant [3 x i8] c"foo"
+
+define i32 @foo(i32 %n) {
+entry:
+  call void @llvm.instrprof.increment(ptr @__profn_foo, i64 
1124680652598534200, i32 3, i32 2)
+  %cmp16 = icmp slt i32 0, %n
+  br i1 %cmp16, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup
+
+for.cond1.preheader.lr.ph:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %i.018 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc6, 
%for.cond.cleanup3 ]
+  %x.017 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %x.1.lcssa, 
%for.cond.cleanup3 ]
+  %cmp213 = icmp slt i32 0, %n
+  br i1 %cmp213, label %for.body4.lr.ph, label %for.cond.cleanup3
+
+for.body4.lr.ph:
+  br label %for.body4
+
+for.cond.for.cond.cleanup_crit_edge:
+  %split19 = phi i32 [ %x.1.lcssa, %for.cond.cleanup3 ]
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %x.0.lcssa = phi i32 [ %split19, %for.cond.for.cond.cleanup_crit_edge ], [ 
0, %entry ]
+  ret i32 %x.0.lcssa
+
+for.cond1.for.cond.cleanup3_crit_edge:
+  %split = phi i32 [ %add, %for.body4 ]
+  br label %for.cond.cleanup3
+
+for.cond.cleanup3:
+  %x.1.lcssa = phi i32 [ %split, %for.cond1.for.cond.cleanup3_crit_edge ], [ 
%x.017, %for.cond1.preheader ]
+  call void @llvm.instrprof.increment(ptr @__profn_foo, i64 
1124680652598534200, i32 3, i32 1)
+  %inc6 = add nuw nsw i32 %i.018, 1
+  %cmp = icmp slt i32 %inc6, %n
+  br i1 %cmp, label %for.cond1.preheader, label 
%for.cond.for.cond.cleanup_crit_edge
+
+for.body4:
+  %j.015 = phi i32 [ 0, %for.body4.lr.ph ], [ %inc, %for.body4 ]
+  %x.114 = phi i32 [ %x.017, %for.body4.lr.ph ], [ %add, %for.body4 ]
+  call void @llvm.instrprof.increment(ptr @__profn_foo, i64 
1124680652598534200, i32 3, i32 0)
+  %add = add nsw i32 %x.114, %j.015
+  %inc = add nuw nsw i32 %j.015, 1
+  %cmp2 = icmp slt i32 %inc, %n
+  br i1 %cmp2, label %for.body4, label %for.cond1.for.cond.cleanup3_crit_edge
+}
+
+declare void @llvm.instrprof.increment(ptr, i64, i32, i32)

>From a6cf23432401ca3a323545c2cd67b33cee8c5fa6 Mon Sep 17 00:00:00 2001
From: Wael Yehia <[email protected]>
Date: Mon, 15 Jun 2026 04:48:21 +0000
Subject: [PATCH 2/8] code review

---
 .../Instrumentation/InstrProfiling.cpp        | 80 +++++++++----------
 .../PGOProfile/atomic_counter_promote.ll      |  2 +-
 2 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp 
b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index ed01912ccffbd..c31a96dbefbb2 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -15,7 +15,6 @@
 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
@@ -119,13 +118,8 @@ cl::opt<bool> AtomicCounterUpdateAll(
     cl::desc("Make all profile counter updates atomic (for testing only)"),
     cl::init(false));
 
-cl::opt<bool> AtomicCounterPromote(
-    "atomic-counter-promote",
-    cl::desc("Atomize profile counter updates and promote where possible"),
-    cl::init(false));
-
-cl::opt<bool> SanityCheck(
-    "atomic-counter-promote-check",
+cl::opt<bool> VerifyAtomicPromotion(
+    "verify-atomic-counter-promoted",
     cl::desc("Check that all profile counter updates were made atomic"),
     cl::init(false));
 
@@ -238,7 +232,6 @@ static SampledInstrumentationConfig 
getSampledInstrumentationConfig() {
 using LoadStorePair = std::pair<Instruction *, Instruction *>;
 
 static void makeAtomic(Instruction *Load, Instruction *Store) {
-  // assert the load and store are accessing the same memory?
   auto *Addition = dyn_cast<BinaryOperator>(Store->getOperand(0));
   assert(Addition && Addition->getOpcode() == Instruction::BinaryOps::Add);
   auto *Addend = Addition->getOperand(1);
@@ -337,6 +330,9 @@ class InstrLowerer final {
   /// Returns true if profile counter update register promotion is enabled.
   bool isCounterPromotionEnabled() const;
 
+  /// Returns true if profile counter updates should be atomic.
+  bool isAtomic() const;
+
   /// Return true if profile sampling is enabled.
   bool isSamplingEnabled() const;
 
@@ -458,9 +454,10 @@ class PGOCounterPromoterHelper : public 
LoadAndStorePromoter {
       BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
       ArrayRef<Instruction *> InsertPts,
       DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
-      LoopInfo &LI)
+      LoopInfo &LI, bool IsAtomic)
       : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
-        InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
+        InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI),
+        IsAtomic(IsAtomic) {
     assert(isa<LoadInst>(L));
     assert(isa<StoreInst>(S));
     SSA.AddAvailableValue(PH, Init);
@@ -490,16 +487,11 @@ class PGOCounterPromoterHelper : public 
LoadAndStorePromoter {
         Addr = Builder.CreateIntToPtr(BiasInst,
                                       
PointerType::getUnqual(Ty->getContext()));
       }
-      if (AtomicCounterUpdatePromoted)
-        // automic update currently can only be promoted across the current
-        // loop, not the whole loop nest.
-        Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
-                                MaybeAlign(),
-                                AtomicOrdering::SequentiallyConsistent);
       // Generate the relaxed atomic RMW if we've asked for it and no more
       // promotion is possible.
-      else if (AtomicCounterPromote &&
-               (!IterativeCounterPromotion || !LI.getLoopFor(ExitBlock)))
+      if (AtomicCounterUpdatePromoted ||
+               (IsAtomic &&
+                (!IterativeCounterPromotion || !LI.getLoopFor(ExitBlock))))
         Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
                                 MaybeAlign(), AtomicOrdering::Monotonic);
       else {
@@ -523,6 +515,7 @@ class PGOCounterPromoterHelper : public 
LoadAndStorePromoter {
   ArrayRef<Instruction *> InsertPts;
   DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
   LoopInfo &LI;
+  const bool IsAtomic;
 };
 
 /// A helper class to do register promotion for all profile counter
@@ -532,8 +525,9 @@ class PGOCounterPromoter {
 public:
   PGOCounterPromoter(
       DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
-      Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
-      : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI) {
+      Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI, bool IsAtomic)
+      : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI),
+        IsAtomic(IsAtomic) {
 
     // Skip collection of ExitBlocks and InsertPts for loops that will not be
     // able to have counters promoted.
@@ -556,20 +550,17 @@ class PGOCounterPromoter {
   }
 
   bool run(int64_t *NumPromoted) {
-    // In this function we examine loop L and other parameters to decide what
-    // candidates are promotable. Once we've promoted what we can, we convert
-    // all remaining candidates to use atomics. This requires that promoted
-    // candidates are set to nullptr in the LoopToCandidates[&L] array.
-    llvm::scope_exit Cleanup([&]() {
-      if (!AtomicCounterPromote)
-        return;
+    bool RC = PromoteCandidates(NumPromoted);
+    if (IsAtomic)
       for (auto &Cand : LoopToCandidates[&L]) {
         if (Cand.first != nullptr && Cand.second != nullptr)
           makeAtomic(Cand.first, Cand.second);
-        Cand = {nullptr, nullptr};
       }
-    });
+    return RC;
+  }
 
+private:
+  bool PromoteCandidates(int64_t *NumPromoted) {
     // Skip 'infinite' loops:
     if (ExitBlocks.size() == 0)
       return false;
@@ -589,6 +580,7 @@ class PGOCounterPromoter {
     if (MaxProm == 0)
       return false;
 
+    const void *Ptr = LoopToCandidates.getPointerIntoBucketsArray();
     unsigned Promoted = 0;
     for (auto &Cand : LoopToCandidates[&L]) {
       SmallVector<PHINode *, 4> NewPHIs;
@@ -608,11 +600,15 @@ class PGOCounterPromoter {
           continue;
       }
 
-      PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
-                                        L.getLoopPreheader(), ExitBlocks,
-                                        InsertPts, LoopToCandidates, LI);
+      PGOCounterPromoterHelper Promoter(
+          Cand.first, Cand.second, SSA, InitVal, L.getLoopPreheader(),
+          ExitBlocks, InsertPts, LoopToCandidates, LI, IsAtomic);
       Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
+
+      assert(LoopToCandidates.isPointerIntoBucketsArray(Ptr) &&
+             "References into LoopToCandidates might be invalid");
       Cand = {nullptr, nullptr};
+
       Promoted++;
       if (Promoted >= MaxProm)
         break;
@@ -706,6 +702,7 @@ class PGOCounterPromoter {
   Loop &L;
   LoopInfo &LI;
   BlockFrequencyInfo *BFI;
+  const bool IsAtomic;
 };
 
 enum class ValueProfilingCallType {
@@ -916,11 +913,13 @@ bool InstrLowerer::isSamplingEnabled() const {
 bool InstrLowerer::isCounterPromotionEnabled() const {
   if (DoCounterPromotion.getNumOccurrences() > 0)
     return DoCounterPromotion;
-  if (AtomicCounterPromote)
-    return true;
   return Options.DoCounterPromotion;
 }
 
+bool InstrLowerer::isAtomic() const {
+  return Options.Atomic || AtomicCounterUpdateAll;
+}
+
 static void doAtomicPromotionCheck(Function *F) {
   for (const llvm::Instruction &I : llvm::instructions(F)) {
     const Value *Addr = nullptr;
@@ -958,7 +957,7 @@ void InstrLowerer::promoteCounterLoadStores(Function *F) {
     BasicBlock *BB = CounterLoad->getParent();
     Loop *ParentLoop = LI.getLoopFor(BB);
     if (!ParentLoop) {
-      if (AtomicCounterPromote)
+      if (isAtomic())
         makeAtomic(CounterLoad, CounterStore);
       continue;
     }
@@ -970,11 +969,12 @@ void InstrLowerer::promoteCounterLoadStores(Function *F) {
   // Do a post-order traversal of the loops so that counter updates can be
   // iteratively hoisted outside the loop nest.
   for (auto *Loop : llvm::reverse(Loops)) {
-    PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
+    PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get(),
+                                isAtomic());
     Promoter.run(&TotalCountersPromoted);
   }
 
-  if (AtomicCounterPromote && SanityCheck)
+  if (isAtomic() && VerifyAtomicPromotion)
     doAtomicPromotionCheck(F);
 }
 
@@ -1284,7 +1284,7 @@ void InstrLowerer::lowerIncrement(InstrProfIncrementInst 
*Inc) {
     Value *StepI64 =
         Builder.CreateZExtOrTrunc(Inc->getStep(), Int64Ty, "step.i64");
     Builder.CreateCall(Callee, {CastAddr, Uniform, StepI64});
-  } else if (Options.Atomic || AtomicCounterUpdateAll ||
+  } else if ((!isCounterPromotionEnabled() && isAtomic()) ||
              (Inc->getIndex()->isNullValue() && AtomicFirstCounter)) {
     Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
                             MaybeAlign(), AtomicOrdering::Monotonic);
@@ -1354,7 +1354,7 @@ void InstrLowerer::lowerMCDCTestVectorBitmapUpdate(
   //  %mcdc.bits = load i8, ptr %4, align 1
   auto *Bitmap = Builder.CreateLoad(Int8Ty, BitmapByteAddr, "mcdc.bits");
 
-  if (Options.Atomic || AtomicCounterUpdateAll) {
+  if (isAtomic()) {
     // If ((Bitmap & Val) != Val), then execute atomic (Bitmap |= Val).
     // Note, just-loaded Bitmap might not be up-to-date. Use it just for
     // early testing.
diff --git a/llvm/test/Transforms/PGOProfile/atomic_counter_promote.ll 
b/llvm/test/Transforms/PGOProfile/atomic_counter_promote.ll
index 28c83eb1a2aa5..4dd3f3a249cf2 100644
--- a/llvm/test/Transforms/PGOProfile/atomic_counter_promote.ll
+++ b/llvm/test/Transforms/PGOProfile/atomic_counter_promote.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes=instrprof -atomic-counter-promote -S | FileCheck %s
+; RUN: opt < %s -passes=instrprof -instrprof-atomic-counter-update-all 
-do-counter-promotion -S | FileCheck %s
 
 ; CHECK: define i32 @foo(i32 %n) {
 ; CHECK: entry:

>From bf7967a96570b3fc3b999bf2ea20b6fab8f53247 Mon Sep 17 00:00:00 2001
From: Wael Yehia <[email protected]>
Date: Mon, 15 Jun 2026 14:41:58 +0000
Subject: [PATCH 3/8] update tests

---
 llvm/test/Transforms/PGOProfile/counter_promo.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/PGOProfile/counter_promo.ll 
b/llvm/test/Transforms/PGOProfile/counter_promo.ll
index f4c4d2a8123a3..16ff170c107c6 100644
--- a/llvm/test/Transforms/PGOProfile/counter_promo.ll
+++ b/llvm/test/Transforms/PGOProfile/counter_promo.ll
@@ -55,9 +55,9 @@ bb12:                                             ; preds = 
%bb9
 ; NONATOMIC_PROMO-NEXT: %[[PROMO3:[a-z0-9.]+]] = load {{.*}} 
@__profc_foo{{.*}} 2)
 ; NONATOMIC_PROMO-NEXT: add {{.*}} %[[PROMO3]], %[[LIVEOUT3]]
 ; NONATOMIC_PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}2)
-; ATOMIC_PROMO: atomicrmw add {{.*}} @__profc_foo{{.*}}, i64 %[[LIVEOUT1]] 
seq_cst
-; ATOMIC_PROMO-NEXT: atomicrmw add {{.*}} @__profc_foo{{.*}}1), i64 
%[[LIVEOUT2]] seq_cst
-; ATOMIC_PROMO-NEXT: atomicrmw add {{.*}} @__profc_foo{{.*}}2), i64 
%[[LIVEOUT3]] seq_cst
+; ATOMIC_PROMO: atomicrmw add {{.*}} @__profc_foo{{.*}}, i64 %[[LIVEOUT1]] 
monotonic
+; ATOMIC_PROMO-NEXT: atomicrmw add {{.*}} @__profc_foo{{.*}}1), i64 
%[[LIVEOUT2]] monotonic
+; ATOMIC_PROMO-NEXT: atomicrmw add {{.*}} @__profc_foo{{.*}}2), i64 
%[[LIVEOUT3]] monotonic
 ; PROMO-NOT: @__profc_foo{{.*}})
 
 

>From 104f46b75c6c881c20bfc53b201d20d007697680 Mon Sep 17 00:00:00 2001
From: Wael Yehia <[email protected]>
Date: Mon, 15 Jun 2026 15:24:15 +0000
Subject: [PATCH 4/8] clang-format

---
 llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp 
b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index c31a96dbefbb2..6a54bbe993e2a 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -490,8 +490,8 @@ class PGOCounterPromoterHelper : public 
LoadAndStorePromoter {
       // Generate the relaxed atomic RMW if we've asked for it and no more
       // promotion is possible.
       if (AtomicCounterUpdatePromoted ||
-               (IsAtomic &&
-                (!IterativeCounterPromotion || !LI.getLoopFor(ExitBlock))))
+          (IsAtomic &&
+           (!IterativeCounterPromotion || !LI.getLoopFor(ExitBlock))))
         Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
                                 MaybeAlign(), AtomicOrdering::Monotonic);
       else {

>From 1c9b8b813c2fbbc0f64e1a5afa32fa3bd4ae5078 Mon Sep 17 00:00:00 2001
From: Wael Yehia <[email protected]>
Date: Mon, 15 Jun 2026 15:34:17 +0000
Subject: [PATCH 5/8] formating

---
 llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp 
b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 6a54bbe993e2a..08031c4e94c36 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -550,17 +550,16 @@ class PGOCounterPromoter {
   }
 
   bool run(int64_t *NumPromoted) {
-    bool RC = PromoteCandidates(NumPromoted);
+    bool RC = promoteCandidates(NumPromoted);
     if (IsAtomic)
-      for (auto &Cand : LoopToCandidates[&L]) {
+      for (auto &Cand : LoopToCandidates[&L])
         if (Cand.first != nullptr && Cand.second != nullptr)
           makeAtomic(Cand.first, Cand.second);
-      }
     return RC;
   }
 
 private:
-  bool PromoteCandidates(int64_t *NumPromoted) {
+  bool promoteCandidates(int64_t *NumPromoted) {
     // Skip 'infinite' loops:
     if (ExitBlocks.size() == 0)
       return false;
@@ -1293,7 +1292,6 @@ void InstrLowerer::lowerIncrement(InstrProfIncrementInst 
*Inc) {
     Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
     auto *Count = Builder.CreateAdd(Load, Inc->getStep());
     auto *Store = Builder.CreateStore(Count, Addr);
-
     if (isCounterPromotionEnabled())
       PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
   }

>From 86efdb7904853c8b72b0979d863d9f10dac530f1 Mon Sep 17 00:00:00 2001
From: Wael Yehia <[email protected]>
Date: Tue, 16 Jun 2026 16:27:21 +0000
Subject: [PATCH 6/8] common out the TargetLoop calculation

---
 .../Transforms/Instrumentation/InstrProfiling.cpp   | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp 
b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 08031c4e94c36..035c41d092d7b 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -487,11 +487,11 @@ class PGOCounterPromoterHelper : public 
LoadAndStorePromoter {
         Addr = Builder.CreateIntToPtr(BiasInst,
                                       
PointerType::getUnqual(Ty->getContext()));
       }
+      auto *TargetLoop =
+          IterativeCounterPromotion ? LI.getLoopFor(ExitBlock) : nullptr;
       // Generate the relaxed atomic RMW if we've asked for it and no more
       // promotion is possible.
-      if (AtomicCounterUpdatePromoted ||
-          (IsAtomic &&
-           (!IterativeCounterPromotion || !LI.getLoopFor(ExitBlock))))
+      if ((IsAtomic && !TargetLoop) || AtomicCounterUpdatePromoted)
         Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
                                 MaybeAlign(), AtomicOrdering::Monotonic);
       else {
@@ -500,11 +500,8 @@ class PGOCounterPromoterHelper : public 
LoadAndStorePromoter {
         auto *NewStore = Builder.CreateStore(NewVal, Addr);
 
         // Now update the parent loop's candidate list:
-        if (IterativeCounterPromotion) {
-          auto *TargetLoop = LI.getLoopFor(ExitBlock);
-          if (TargetLoop)
-            LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
-        }
+        if (TargetLoop)
+          LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
       }
     }
   }

>From 84ece6f6a9b5c7fc903905cfa5fbed64634b2450 Mon Sep 17 00:00:00 2001
From: Wael Yehia <[email protected]>
Date: Tue, 16 Jun 2026 18:46:35 +0000
Subject: [PATCH 7/8] use report_fatal_error instead of assert(0)

---
 llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp 
b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 035c41d092d7b..f481ab23531bd 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -120,7 +120,8 @@ cl::opt<bool> AtomicCounterUpdateAll(
 
 cl::opt<bool> VerifyAtomicPromotion(
     "verify-atomic-counter-promoted",
-    cl::desc("Check that all profile counter updates were made atomic"),
+    cl::desc("Check that all profile counter updates were made atomic; no-op "
+             "if atomic updates are not requested (-fprofile-update=atomic)"),
     cl::init(false));
 
 cl::opt<bool> AtomicCounterUpdatePromoted(
@@ -927,7 +928,7 @@ static void doAtomicPromotionCheck(Function *F) {
     if (Addr && Addr->stripInBoundsOffsets()->getName().starts_with(
                     getInstrProfCountersVarPrefix())) {
       LLVM_DEBUG(dbgs() << "Missed candidate: "; I.dump());
-      assert(false && "Candidate load/store not converted to atomic");
+      report_fatal_error("Candidate load/store not converted to atomic");
     }
   }
 }

>From 1cb1268465e9582696547e989a18603f01216311 Mon Sep 17 00:00:00 2001
From: Wael Yehia <[email protected]>
Date: Wed, 17 Jun 2026 16:00:46 +0000
Subject: [PATCH 8/8] update release notes

---
 clang/docs/ReleaseNotes.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5e7a0c76d4594..7e17bc3150629 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -406,6 +406,8 @@ Modified Compiler Flags
   by ``-unique-internal-linkage-names`` option. Now it uses a path that
   normalized in favor of the target system (same as the preprocessor does
   for the file macros) and allows the reproducable IDs on any build system.
+- ``-fprofile-update=atomic`` will now promote counter updates out of loops,
+  similar to the non-atomic case 
([#202487](https://github.com/llvm/llvm-project/pull/202487)).
 
 Removed Compiler Flags
 ----------------------

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to