[llvm-branch-commits] [flang] [mlir] [mlir][omp] Improve canonloop/iv naming (PR #159773)

2025-09-30 Thread Michael Kruse via llvm-branch-commits

https://github.com/Meinersbur updated 
https://github.com/llvm/llvm-project/pull/159773

>From b3919715ebe223b39dd789dcd471a864666d7008 Mon Sep 17 00:00:00 2001
From: Michael Kruse 
Date: Fri, 19 Sep 2025 14:43:48 +0200
Subject: [PATCH 1/8] Improve canonloop/iv naming

---
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 237 +-
 .../Dialect/OpenMP/cli-canonical_loop.mlir| 127 --
 .../Dialect/OpenMP/cli-unroll-heuristic.mlir  |  28 +--
 3 files changed, 292 insertions(+), 100 deletions(-)

diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp 
b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 3d70e28ed23ab..cf549a6bb50b4 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -77,6 +77,178 @@ struct LLVMPointerPointerLikeModel
 };
 } // namespace
 
+/// Generate a name of a canonical loop nest of the format
+/// `(_s_r)*` that describes its nesting inside parent
+/// operations (`_r`) and that operation's region (`_s`). The region
+/// number is omitted if the parent operation has just one region. If a loop
+/// nest just consists of canonical loops nested inside each other, also uses
+/// `d` where  is the nesting depth of the loop.
+static std::string generateLoopNestingName(StringRef prefix,
+   CanonicalLoopOp op) {
+  struct Component {
+// An region argument of an operation
+Operation *parentOp;
+size_t regionInOpIdx;
+bool isOnlyRegionInOp;
+bool skipRegion;
+
+// An operation somewhere in a parent region
+Operation *thisOp;
+Region *parentRegion;
+size_t opInRegionIdx;
+bool isOnlyOpInRegion;
+bool skipOp;
+int depth = -1;
+  };
+  SmallVector components;
+
+  // Gather a list of parent regions and operations, and the position within
+  // their parent
+  Operation *o = op.getOperation();
+  while (o) {
+if (o->hasTrait())
+  break;
+
+// Operation within a region
+Region *r = o->getParentRegion();
+if (!r)
+  break;
+
+llvm::ReversePostOrderTraversal 
traversal(&r->getBlocks().front());
+size_t idx = 0;
+bool found = false;
+size_t sequentialIdx = -1;
+bool isOnlyLoop = true;
+for (Block *b : traversal) {
+  for (Operation &op : *b) {
+if (&op == o && !found) {
+  sequentialIdx = idx;
+  found = true;
+}
+if (op.getNumRegions()) {
+  idx += 1;
+  if (idx > 1)
+isOnlyLoop = false;
+}
+if (found && !isOnlyLoop)
+  break;
+  }
+}
+
+Component &comp = components.emplace_back();
+comp.thisOp = o;
+comp.parentRegion = r;
+comp.opInRegionIdx = sequentialIdx;
+comp.isOnlyOpInRegion = isOnlyLoop;
+
+// Region argument of an operation
+Operation *parent = r->getParentOp();
+
+comp.parentOp = parent;
+comp.regionInOpIdx = 0;
+comp.isOnlyRegionInOp = true;
+if (parent && parent->getRegions().size() > 1) {
+  auto getRegionIndex = [](Operation *o, Region *r) {
+for (auto [idx, region] : llvm::enumerate(o->getRegions())) {
+  if (®ion == r)
+return idx;
+}
+llvm_unreachable("Region not child of its parent operation");
+  };
+  comp.regionInOpIdx = getRegionIndex(parent, r);
+  comp.isOnlyRegionInOp = false;
+}
+
+if (!parent)
+  break;
+
+// next parent
+o = parent;
+  }
+
+  // Reorder components from outermost to innermost
+  std::reverse(components.begin(), components.end());
+
+  // Determine whether a component is not needed
+  for (auto &c : components) {
+c.skipRegion = c.isOnlyRegionInOp;
+c.skipOp = c.isOnlyOpInRegion && !isa(c.thisOp);
+  }
+
+  // Find runs of perfect nests and merge them into a single component
+  int curNestRoot = 0;
+  int curNestDepth = 1;
+  auto mergeLoopNest = [&](int innermost) {
+auto outermost = curNestRoot;
+
+// Don't do enything if it does not consist of at least 2 loops
+if (outermost < innermost) {
+  for (auto i : llvm::seq(outermost + 1, innermost)) {
+components[i].skipOp = true;
+  }
+  components[innermost].depth = curNestDepth;
+}
+
+// Start new root
+curNestRoot = innermost + 1;
+curNestDepth = 1;
+  };
+  for (auto &&[i, c] : llvm::enumerate(components)) {
+if (i <= curNestRoot)
+  continue;
+
+// Check whether this region can be included
+if (!c.skipRegion) {
+  mergeLoopNest(i);
+  continue;
+}
+
+if (c.skipOp)
+  continue;
+
+if (!c.isOnlyOpInRegion) {
+  mergeLoopNest(i);
+  continue;
+}
+
+curNestDepth += 1;
+  }
+
+  // Finalize innermost loop nest
+  mergeLoopNest(components.size() - 1);
+
+  // Outermost loop does not need a suffix if it has no sibling
+  for (auto &c : components) {
+if (c.skipOp)
+  continue;
+if (c.isOnlyOpInRegion)
+  c.skipOp = true;
+break;
+  }
+
+  // 

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: optionally assume auth traps on failure (PR #139778)

2025-09-30 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/139778

>From 1c2efdf074ba37cffe170a4a33b54c1000f169b9 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 13 May 2025 19:50:41 +0300
Subject: [PATCH 1/2] [BOLT] Gadget scanner: optionally assume auth traps on
 failure

On AArch64 it is possible for an auth instruction to either return an
invalid address value on failure (without FEAT_FPAC) or generate an
error (with FEAT_FPAC). It thus may be possible to never emit explicit
pointer checks, if the target CPU is known to support FEAT_FPAC.

This commit implements an --auth-traps-on-failure command line option,
which essentially makes "safe-to-dereference" and "trusted" register
properties identical and disables scanning for authentication oracles
completely.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 112 +++
 .../binary-analysis/AArch64/cmdline-args.test |   1 +
 .../AArch64/gs-pauth-authentication-oracles.s |   6 +-
 .../binary-analysis/AArch64/gs-pauth-calls.s  |   5 +-
 .../AArch64/gs-pauth-debug-output.s   | 177 ++---
 .../AArch64/gs-pauth-jump-table.s |   6 +-
 .../AArch64/gs-pauth-signing-oracles.s|  54 ++---
 .../AArch64/gs-pauth-tail-calls.s | 184 +-
 8 files changed, 318 insertions(+), 227 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index af453a5aa6871..a19d59a3c92da 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -14,6 +14,7 @@
 #include "bolt/Passes/PAuthGadgetScanner.h"
 #include "bolt/Core/ParallelUtilities.h"
 #include "bolt/Passes/DataflowAnalysis.h"
+#include "bolt/Utils/CommandLineOpts.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/MC/MCInst.h"
@@ -26,6 +27,11 @@ namespace llvm {
 namespace bolt {
 namespace PAuthGadgetScanner {
 
+static cl::opt AuthTrapsOnFailure(
+"auth-traps-on-failure",
+cl::desc("Assume authentication instructions always trap on failure"),
+cl::cat(opts::BinaryAnalysisCategory));
+
 [[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef 
Label,
const MCInst &MI) {
   dbgs() << "  " << Label << ": ";
@@ -364,6 +370,34 @@ class SrcSafetyAnalysis {
 return Clobbered;
   }
 
+  std::optional getRegMadeTrustedByChecking(const MCInst &Inst,
+   SrcState Cur) const {
+// This functions cannot return multiple registers. This is never the case
+// on AArch64.
+std::optional RegCheckedByInst =
+BC.MIB->getAuthCheckedReg(Inst, /*MayOverwrite=*/false);
+if (RegCheckedByInst && Cur.SafeToDerefRegs[*RegCheckedByInst])
+  return *RegCheckedByInst;
+
+auto It = CheckerSequenceInfo.find(&Inst);
+if (It == CheckerSequenceInfo.end())
+  return std::nullopt;
+
+MCPhysReg RegCheckedBySequence = It->second.first;
+const MCInst *FirstCheckerInst = It->second.second;
+
+// FirstCheckerInst should belong to the same basic block (see the
+// assertion in DataflowSrcSafetyAnalysis::run()), meaning it was
+// deterministically processed a few steps before this instruction.
+const SrcState &StateBeforeChecker = getStateBefore(*FirstCheckerInst);
+
+// The sequence checks the register, but it should be authenticated before.
+if (!StateBeforeChecker.SafeToDerefRegs[RegCheckedBySequence])
+  return std::nullopt;
+
+return RegCheckedBySequence;
+  }
+
   // Returns all registers that can be treated as if they are written by an
   // authentication instruction.
   SmallVector getRegsMadeSafeToDeref(const MCInst &Point,
@@ -386,18 +420,38 @@ class SrcSafetyAnalysis {
 Regs.push_back(DstAndSrc->first);
 }
 
+// Make sure explicit checker sequence keeps register safe-to-dereference
+// when the register would be clobbered according to the regular rules:
+//
+//; LR is safe to dereference here
+//mov   x16, x30  ; start of the sequence, LR is s-t-d right before
+//xpaclri ; clobbers LR, LR is not safe anymore
+//cmp   x30, x16
+//b.eq  1f; end of the sequence: LR is marked as trusted
+//brk   0x1234
+//  1:
+//; at this point LR would be marked as trusted,
+//; but not safe-to-dereference
+//
+// or even just
+//
+//; X1 is safe to dereference here
+//ldr x0, [x1, #8]!
+//; X1 is trusted here, but it was clobbered due to address write-back
+if (auto CheckedReg = getRegMadeTrustedByChecking(Point, Cur))
+  Regs.push_back(*CheckedReg);
+
 return Regs;
   }
 
   // Returns all registers made trusted by this instruction.
   SmallVector getRegsMadeTrusted(const MCInst &Point,
 const SrcState &Cur) const {
+assert(!AuthTrapsOnFailur

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: make use of C++17 features and LLVM helpers (PR #141665)

2025-09-30 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/141665

>From 8d63466a0300e5749006619c638b1bb65072cbf7 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 27 May 2025 21:06:03 +0300
Subject: [PATCH] [BOLT] Gadget scanner: make use of C++17 features and LLVM
 helpers

Perform trivial syntactical cleanups:
* make use of structured binding declarations
* use LLVM utility functions when appropriate
* omit braces around single expression inside single-line LLVM_DEBUG()

This patch is NFC aside from minor debug output changes.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 64 +--
 .../AArch64/gs-pauth-debug-output.s   | 14 ++--
 2 files changed, 37 insertions(+), 41 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 3acc6654115f4..b032561a43274 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -88,8 +88,8 @@ class TrackedRegisters {
   TrackedRegisters(ArrayRef RegsToTrack)
   : Registers(RegsToTrack),
 RegToIndexMapping(getMappingSize(RegsToTrack), NoIndex) {
-for (unsigned I = 0; I < RegsToTrack.size(); ++I)
-  RegToIndexMapping[RegsToTrack[I]] = I;
+for (auto [MappedIndex, Reg] : llvm::enumerate(RegsToTrack))
+  RegToIndexMapping[Reg] = MappedIndex;
   }
 
   ArrayRef getRegisters() const { return Registers; }
@@ -203,9 +203,9 @@ struct SrcState {
 
 SafeToDerefRegs &= StateIn.SafeToDerefRegs;
 TrustedRegs &= StateIn.TrustedRegs;
-for (unsigned I = 0; I < LastInstWritingReg.size(); ++I)
-  for (const MCInst *J : StateIn.LastInstWritingReg[I])
-LastInstWritingReg[I].insert(J);
+for (auto [ThisSet, OtherSet] :
+ llvm::zip_equal(LastInstWritingReg, StateIn.LastInstWritingReg))
+  ThisSet.insert_range(OtherSet);
 return *this;
   }
 
@@ -224,11 +224,9 @@ struct SrcState {
 static void printInstsShort(raw_ostream &OS,
 ArrayRef Insts) {
   OS << "Insts: ";
-  for (unsigned I = 0; I < Insts.size(); ++I) {
-auto &Set = Insts[I];
+  for (auto [I, PtrSet] : llvm::enumerate(Insts)) {
 OS << "[" << I << "](";
-for (const MCInst *MCInstP : Set)
-  OS << MCInstP << " ";
+interleave(PtrSet, OS, " ");
 OS << ")";
   }
 }
@@ -416,8 +414,9 @@ class SrcSafetyAnalysis {
 // ... an address can be updated in a safe manner, producing the result
 // which is as trusted as the input address.
 if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Point)) {
-  if (Cur.SafeToDerefRegs[DstAndSrc->second])
-Regs.push_back(DstAndSrc->first);
+  auto [DstReg, SrcReg] = *DstAndSrc;
+  if (Cur.SafeToDerefRegs[SrcReg])
+Regs.push_back(DstReg);
 }
 
 // Make sure explicit checker sequence keeps register safe-to-dereference
@@ -469,8 +468,9 @@ class SrcSafetyAnalysis {
 // ... an address can be updated in a safe manner, producing the result
 // which is as trusted as the input address.
 if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Point)) {
-  if (Cur.TrustedRegs[DstAndSrc->second])
-Regs.push_back(DstAndSrc->first);
+  auto [DstReg, SrcReg] = *DstAndSrc;
+  if (Cur.TrustedRegs[SrcReg])
+Regs.push_back(DstReg);
 }
 
 return Regs;
@@ -865,9 +865,9 @@ struct DstState {
   return (*this = StateIn);
 
 CannotEscapeUnchecked &= StateIn.CannotEscapeUnchecked;
-for (unsigned I = 0; I < FirstInstLeakingReg.size(); ++I)
-  for (const MCInst *J : StateIn.FirstInstLeakingReg[I])
-FirstInstLeakingReg[I].insert(J);
+for (auto [ThisSet, OtherSet] :
+ llvm::zip_equal(FirstInstLeakingReg, StateIn.FirstInstLeakingReg))
+  ThisSet.insert_range(OtherSet);
 return *this;
   }
 
@@ -1033,8 +1033,7 @@ class DstSafetyAnalysis {
 
 // ... an address can be updated in a safe manner, or
 if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Inst)) {
-  MCPhysReg DstReg, SrcReg;
-  std::tie(DstReg, SrcReg) = *DstAndSrc;
+  auto [DstReg, SrcReg] = *DstAndSrc;
   // Note that *all* registers containing the derived values must be safe,
   // both source and destination ones. No temporaries are supported at now.
   if (Cur.CannotEscapeUnchecked[SrcReg] &&
@@ -1074,7 +1073,7 @@ class DstSafetyAnalysis {
 // If this instruction terminates the program immediately, no
 // authentication oracles are possible past this point.
 if (BC.MIB->isTrap(Point)) {
-  LLVM_DEBUG({ traceInst(BC, "Trap instruction found", Point); });
+  LLVM_DEBUG(traceInst(BC, "Trap instruction found", Point));
   DstState Next(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
   Next.CannotEscapeUnchecked.set();
   return Next;
@@ -1249,7 +1248,7 @@ class CFGUnawareDstSafetyAnalysis : public 
DstSafetyAnalysis,
   // starting to analyze Inst.
   

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: optionally assume auth traps on failure (PR #139778)

2025-09-30 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/139778

>From 1c2efdf074ba37cffe170a4a33b54c1000f169b9 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 13 May 2025 19:50:41 +0300
Subject: [PATCH 1/2] [BOLT] Gadget scanner: optionally assume auth traps on
 failure

On AArch64 it is possible for an auth instruction to either return an
invalid address value on failure (without FEAT_FPAC) or generate an
error (with FEAT_FPAC). It thus may be possible to never emit explicit
pointer checks, if the target CPU is known to support FEAT_FPAC.

This commit implements an --auth-traps-on-failure command line option,
which essentially makes "safe-to-dereference" and "trusted" register
properties identical and disables scanning for authentication oracles
completely.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 112 +++
 .../binary-analysis/AArch64/cmdline-args.test |   1 +
 .../AArch64/gs-pauth-authentication-oracles.s |   6 +-
 .../binary-analysis/AArch64/gs-pauth-calls.s  |   5 +-
 .../AArch64/gs-pauth-debug-output.s   | 177 ++---
 .../AArch64/gs-pauth-jump-table.s |   6 +-
 .../AArch64/gs-pauth-signing-oracles.s|  54 ++---
 .../AArch64/gs-pauth-tail-calls.s | 184 +-
 8 files changed, 318 insertions(+), 227 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index af453a5aa6871..a19d59a3c92da 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -14,6 +14,7 @@
 #include "bolt/Passes/PAuthGadgetScanner.h"
 #include "bolt/Core/ParallelUtilities.h"
 #include "bolt/Passes/DataflowAnalysis.h"
+#include "bolt/Utils/CommandLineOpts.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/MC/MCInst.h"
@@ -26,6 +27,11 @@ namespace llvm {
 namespace bolt {
 namespace PAuthGadgetScanner {
 
+static cl::opt AuthTrapsOnFailure(
+"auth-traps-on-failure",
+cl::desc("Assume authentication instructions always trap on failure"),
+cl::cat(opts::BinaryAnalysisCategory));
+
 [[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef 
Label,
const MCInst &MI) {
   dbgs() << "  " << Label << ": ";
@@ -364,6 +370,34 @@ class SrcSafetyAnalysis {
 return Clobbered;
   }
 
+  std::optional getRegMadeTrustedByChecking(const MCInst &Inst,
+   SrcState Cur) const {
+// This functions cannot return multiple registers. This is never the case
+// on AArch64.
+std::optional RegCheckedByInst =
+BC.MIB->getAuthCheckedReg(Inst, /*MayOverwrite=*/false);
+if (RegCheckedByInst && Cur.SafeToDerefRegs[*RegCheckedByInst])
+  return *RegCheckedByInst;
+
+auto It = CheckerSequenceInfo.find(&Inst);
+if (It == CheckerSequenceInfo.end())
+  return std::nullopt;
+
+MCPhysReg RegCheckedBySequence = It->second.first;
+const MCInst *FirstCheckerInst = It->second.second;
+
+// FirstCheckerInst should belong to the same basic block (see the
+// assertion in DataflowSrcSafetyAnalysis::run()), meaning it was
+// deterministically processed a few steps before this instruction.
+const SrcState &StateBeforeChecker = getStateBefore(*FirstCheckerInst);
+
+// The sequence checks the register, but it should be authenticated before.
+if (!StateBeforeChecker.SafeToDerefRegs[RegCheckedBySequence])
+  return std::nullopt;
+
+return RegCheckedBySequence;
+  }
+
   // Returns all registers that can be treated as if they are written by an
   // authentication instruction.
   SmallVector getRegsMadeSafeToDeref(const MCInst &Point,
@@ -386,18 +420,38 @@ class SrcSafetyAnalysis {
 Regs.push_back(DstAndSrc->first);
 }
 
+// Make sure explicit checker sequence keeps register safe-to-dereference
+// when the register would be clobbered according to the regular rules:
+//
+//; LR is safe to dereference here
+//mov   x16, x30  ; start of the sequence, LR is s-t-d right before
+//xpaclri ; clobbers LR, LR is not safe anymore
+//cmp   x30, x16
+//b.eq  1f; end of the sequence: LR is marked as trusted
+//brk   0x1234
+//  1:
+//; at this point LR would be marked as trusted,
+//; but not safe-to-dereference
+//
+// or even just
+//
+//; X1 is safe to dereference here
+//ldr x0, [x1, #8]!
+//; X1 is trusted here, but it was clobbered due to address write-back
+if (auto CheckedReg = getRegMadeTrustedByChecking(Point, Cur))
+  Regs.push_back(*CheckedReg);
+
 return Regs;
   }
 
   // Returns all registers made trusted by this instruction.
   SmallVector getRegsMadeTrusted(const MCInst &Point,
 const SrcState &Cur) const {
+assert(!AuthTrapsOnFailur

[llvm-branch-commits] [llvm] [AArch64] Split large loop dependence masks (PR #153187)

2025-09-30 Thread Benjamin Maxwell via llvm-branch-commits

MacDue wrote:

Hey, I was going over this patch again and tried to simplify the flow of 
`LowerLOOP_DEPENDENCE_MASK`, so in a single step it only does one thing (widen, 
convert to scalable, or split). I have something working here, 
https://github.com/MacDue/llvm-project/commits/alias-intrinsic-backend-splitting/
 (split across two patches), that I think is a little simpler. It also seems to 
have improved the codegen for some fixed-width cases (but please double-check 
they're okay).  

Feel free to take or modify the commits :slightly_smiling_face: 

https://github.com/llvm/llvm-project/pull/153187
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [CIR] Make all opt tests verify roundtrip (PR #161439)

2025-09-30 Thread Henrich Lauko via llvm-branch-commits

https://github.com/xlauko ready_for_review 
https://github.com/llvm/llvm-project/pull/161439
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [CIR] Make all opt tests verify roundtrip (PR #161439)

2025-09-30 Thread Amr Hesham via llvm-branch-commits

https://github.com/AmrDeveloper approved this pull request.


https://github.com/llvm/llvm-project/pull/161439
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [CIR] Upstream `AddressSpace` conversions support (PR #161212)

2025-09-30 Thread David Rivera via llvm-branch-commits


@@ -2283,7 +2295,10 @@ Address CIRGenFunction::createTempAlloca(mlir::Type ty, 
CharUnits align,
   // be different from the type defined by the language. For example,
   // in C++ the auto variables are in the default address space. Therefore
   // cast alloca to the default address space when necessary.
-  assert(!cir::MissingFeatures::addressSpace());
+  if (auto astAS = cir::toCIRAddressSpace(cgm.getLangTempAllocaAddressSpace());
+  getCIRAllocaAddressSpace() != astAS) {

RiverDave wrote:

I left a comment in place. as you mentioned I'll tackle this soon in a 
different PR.

https://github.com/llvm/llvm-project/pull/161212
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LoopUnroll] Fix block frequencies for epilogue (PR #159163)

2025-09-30 Thread Joel E. Denny via llvm-branch-commits

https://github.com/jdenny-ornl updated 
https://github.com/llvm/llvm-project/pull/159163

>From 5a9959313c0aebc1c707d19e30055cb925be7760 Mon Sep 17 00:00:00 2001
From: "Joel E. Denny" 
Date: Tue, 16 Sep 2025 16:03:11 -0400
Subject: [PATCH 1/5] [LoopUnroll] Fix block frequencies for epilogue

As another step in issue #135812, this patch fixes block frequencies
for partial loop unrolling with an epilogue remainder loop.  It does
not fully handle the case when the epilogue loop itself is unrolled.
That will be handled in the next patch.

For the guard and latch of each of the unrolled loop and epilogue
loop, this patch sets branch weights derived directly from the
original loop latch branch weights.  The total frequency of the
original loop body, summed across all its occurrences in the unrolled
loop and epilogue loop, is the same as in the original loop.  This
patch also sets `llvm.loop.estimated_trip_count` for the epilogue loop
instead of relying on the epilogue's latch branch weights to imply it.

This patch removes the XFAIL directives that PR #157754 added to the
test suite.
---
 .../include/llvm/Transforms/Utils/LoopUtils.h |  32 
 .../llvm/Transforms/Utils/UnrollLoop.h|   4 +-
 llvm/lib/Transforms/Utils/LoopUnroll.cpp  |  31 ++--
 .../Transforms/Utils/LoopUnrollRuntime.cpp|  94 --
 llvm/lib/Transforms/Utils/LoopUtils.cpp   |  48 ++
 .../branch-weights-freq/unroll-epilog.ll  | 160 ++
 .../runtime-exit-phi-scev-invalidation.ll |   4 +-
 .../LoopUnroll/runtime-loop-branchweight.ll   |  56 +-
 .../Transforms/LoopUnroll/runtime-loop.ll |   9 +-
 .../LoopUnroll/unroll-heuristics-pgo.ll   |  64 +--
 10 files changed, 448 insertions(+), 54 deletions(-)
 create mode 100644 
llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll

diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h 
b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index c5dbb2bdd1dd8..71754b8f62a16 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -365,6 +365,38 @@ LLVM_ABI bool setLoopEstimatedTripCount(
 Loop *L, unsigned EstimatedTripCount,
 std::optional EstimatedLoopInvocationWeight = std::nullopt);
 
+/// Based on branch weight metadata, return either:
+/// - \c std::nullopt if the implementation is unable to handle the loop form
+///   of \p L (e.g., \p L must have a latch block that controls the loop exit).
+/// - Else, the estimated probability that, at the end of any iteration, the
+///   latch of \p L will start another iteration.  The result \c P is such that
+///   `0 <= P <= 1`, and `1 - P` is the probability of exiting the loop.
+std::optional getLoopProbability(Loop *L);
+
+/// Set branch weight metadata for the latch of \p L to indicate that, at the
+/// end of any iteration, its estimated probability of starting another
+/// iteration is \p P.  Return false if the implementation is unable to handle
+/// the loop form of \p L (e.g., \p L must have a latch block that controls the
+/// loop exit).  Otherwise, return true.
+bool setLoopProbability(Loop *L, double P);
+
+/// Based on branch weight metadata, return either:
+/// - \c std::nullopt if the implementation cannot extract the probability
+///   (e.g., \p B must have exactly two target labels, so it must be a
+///   conditional branch).
+/// - The probability \c P that control flows from \p B to its first target
+///   label such that `1 - P` is the probability of control flowing to its
+///   second target label, or vice-versa if \p ForFirstTarget is false.
+std::optional getBranchProbability(BranchInst *B, bool ForFirstTarget);
+
+/// Set branch weight metadata for \p B to indicate that \p P and `1 - P` are
+/// the probabilities of control flowing to its first and second target labels,
+/// respectively, or vice-versa if \p ForFirstTarget is false.  Return false if
+/// the implementation cannot set the probability (e.g., \p B must have exactly
+/// two target labels, so it must be a conditional branch).  Otherwise, return
+/// true.
+bool setBranchProbability(BranchInst *B, double P, bool ForFirstTarget);
+
 /// Check inner loop (L) backedge count is known to be invariant on all
 /// iterations of its outer loop. If the loop has no parent, this is trivially
 /// true.
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h 
b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index 871c13d972470..571a0af6fd0db 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -97,7 +97,9 @@ LLVM_ABI bool UnrollRuntimeLoopRemainder(
 LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
 const TargetTransformInfo *TTI, bool PreserveLCSSA,
 unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit,
-Loop **ResultLoop = nullptr);
+Loop **ResultLoop = nullptr,
+std::optional OriginalTripCount = std::nullopt,

[llvm-branch-commits] [AllocToken, Clang] Infer type hints from sizeof expressions and casts (PR #156841)

2025-09-30 Thread Marco Elver via llvm-branch-commits


@@ -6672,16 +6762,24 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType,
   RValue Call = EmitCall(FnInfo, Callee, ReturnValue, Args, &LocalCallOrInvoke,
  E == MustTailCall, E->getExprLoc());
 
-  // Generate function declaration DISuprogram in order to be used
-  // in debug info about call sites.
-  if (CGDebugInfo *DI = getDebugInfo()) {
-if (auto *CalleeDecl = dyn_cast_or_null(TargetDecl)) {
+  if (auto *CalleeDecl = dyn_cast_or_null(TargetDecl)) {
+// Generate function declaration DISuprogram in order to be used
+// in debug info about call sites.
+if (CGDebugInfo *DI = getDebugInfo()) {
   FunctionArgList Args;
   QualType ResTy = BuildFunctionArgList(CalleeDecl, Args);
   DI->EmitFuncDeclForCallSite(LocalCallOrInvoke,
   DI->getFunctionType(CalleeDecl, ResTy, Args),
   CalleeDecl);
 }
+if (CalleeDecl->hasAttr() ||

melver wrote:

It's the old name for `__attribute__((malloc))`. 

```
def Restrict : InheritableAttr {
  let Spellings = [Declspec<"restrict">, GCC<"malloc">];
  let Args = [ExprArgument<"Deallocator", /*opt=*/ 1>,
  ParamIdxArgument<"DeallocatorPtrArgIndex", /*opt=*/ 1>];
  let Subjects = SubjectList<[Function]>;
  let Documentation = [RestrictDocs];
}
```

https://github.com/llvm/llvm-project/pull/156841
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [CIR] Make all opt tests verify roundtrip (PR #161439)

2025-09-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Henrich Lauko (xlauko)


Changes

This mirrors incubator changes from https://github.com/llvm/clangir/pull/1923

---
Full diff: https://github.com/llvm/llvm-project/pull/161439.diff


29 Files Affected:

- (modified) clang/test/CIR/IR/alloca.cir (+1-1) 
- (modified) clang/test/CIR/IR/array-ctor.cir (+1-1) 
- (modified) clang/test/CIR/IR/array-dtor.cir (+1-1) 
- (modified) clang/test/CIR/IR/array.cir (+1-1) 
- (modified) clang/test/CIR/IR/atomic.cir (+1-1) 
- (modified) clang/test/CIR/IR/binassign.cir (+1-1) 
- (modified) clang/test/CIR/IR/bitfield_info.cir (+1-1) 
- (modified) clang/test/CIR/IR/call.cir (+1-1) 
- (modified) clang/test/CIR/IR/cast.cir (+1-1) 
- (modified) clang/test/CIR/IR/cmp.cir (+1-1) 
- (modified) clang/test/CIR/IR/complex.cir (+1-1) 
- (modified) clang/test/CIR/IR/copy.cir (+1-1) 
- (modified) clang/test/CIR/IR/func.cir (+1-1) 
- (modified) clang/test/CIR/IR/global-init.cir (+1-1) 
- (modified) clang/test/CIR/IR/global-var-linkage.cir (+1-2) 
- (modified) clang/test/CIR/IR/global.cir (+1-1) 
- (modified) clang/test/CIR/IR/label.cir (+1-1) 
- (modified) clang/test/CIR/IR/module.cir (+1-2) 
- (modified) clang/test/CIR/IR/stack-save-restore.cir (+1-1) 
- (modified) clang/test/CIR/IR/struct.cir (+1-1) 
- (modified) clang/test/CIR/IR/switch-flat.cir (+1-1) 
- (modified) clang/test/CIR/IR/switch.cir (+1-1) 
- (modified) clang/test/CIR/IR/ternary.cir (+1-1) 
- (modified) clang/test/CIR/IR/throw.cir (+1-1) 
- (modified) clang/test/CIR/IR/unary.cir (+1-1) 
- (modified) clang/test/CIR/IR/vector.cir (+1-1) 
- (modified) clang/test/CIR/IR/vtable-addrpt.cir (+1-1) 
- (modified) clang/test/CIR/IR/vtable-attr.cir (+1-1) 
- (modified) clang/test/CIR/IR/vtt-addrpoint.cir (+1-1) 


``diff
diff --git a/clang/test/CIR/IR/alloca.cir b/clang/test/CIR/IR/alloca.cir
index 4a13c44292b35..d94da815f37a7 100644
--- a/clang/test/CIR/IR/alloca.cir
+++ b/clang/test/CIR/IR/alloca.cir
@@ -1,5 +1,5 @@
 
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
 
 !u64i = !cir.int
 !u8i = !cir.int
diff --git a/clang/test/CIR/IR/array-ctor.cir b/clang/test/CIR/IR/array-ctor.cir
index 2378992bbd9fc..fd2ec7eb93c23 100644
--- a/clang/test/CIR/IR/array-ctor.cir
+++ b/clang/test/CIR/IR/array-ctor.cir
@@ -1,5 +1,5 @@
 
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
 
 !u8i = !cir.int
 !rec_S = !cir.record
diff --git a/clang/test/CIR/IR/array-dtor.cir b/clang/test/CIR/IR/array-dtor.cir
index 6d08d1639f0db..1bb9ff9169a9d 100644
--- a/clang/test/CIR/IR/array-dtor.cir
+++ b/clang/test/CIR/IR/array-dtor.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
 
 !u8i = !cir.int
 !rec_S = !cir.record
diff --git a/clang/test/CIR/IR/array.cir b/clang/test/CIR/IR/array.cir
index bba536062d740..ddc6b92b11ee9 100644
--- a/clang/test/CIR/IR/array.cir
+++ b/clang/test/CIR/IR/array.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
 
 !s32i = !cir.int
 
diff --git a/clang/test/CIR/IR/atomic.cir b/clang/test/CIR/IR/atomic.cir
index 6ca5af2aac175..85207633a5294 100644
--- a/clang/test/CIR/IR/atomic.cir
+++ b/clang/test/CIR/IR/atomic.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
 
 !s32i = !cir.int
 !u32i = !cir.int
diff --git a/clang/test/CIR/IR/binassign.cir b/clang/test/CIR/IR/binassign.cir
index 6d2c5c8ab6962..02471264d779e 100644
--- a/clang/test/CIR/IR/binassign.cir
+++ b/clang/test/CIR/IR/binassign.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | cir-opt | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
 
 !s32i = !cir.int
 !s8i = !cir.int
diff --git a/clang/test/CIR/IR/bitfield_info.cir 
b/clang/test/CIR/IR/bitfield_info.cir
index 682e0903fd552..2d743fbfbf595 100644
--- a/clang/test/CIR/IR/bitfield_info.cir
+++ b/clang/test/CIR/IR/bitfield_info.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
 
 !s32i = !cir.int
 !u32i = !cir.int
diff --git a/clang/test/CIR/IR/call.cir b/clang/test/CIR/IR/call.cir
index 9607df7202e0f..59f28be36846f 100644
--- a/clang/test/CIR/IR/call.cir
+++ b/clang/test/CIR/IR/call.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
 
 !s32i = !cir.int
 
diff --git a/clang/test/CIR/IR/cast.cir b/clang/test/CIR/IR/cast.cir
index 11b1664871ef7..3f2fca9fc307b 100644
--- a/clang/test/CIR/IR/cast.cir
+++ b/clang/test/CIR/IR/cast.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | cir-opt | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
 !s32i = !cir.int
 
 module  {
diff --git a/clang/test/CIR/IR/cmp.cir b/clang/test/CIR/IR/cmp.cir
index fdf538d7eef92..0d473986df1c2 100644
--- a/clang/test/CIR/IR/cmp.cir
+++ b/clang/test/CIR/IR/cmp.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | cir-opt |

[llvm-branch-commits] [clang] [AllocToken, Clang] Implement TypeHashPointerSplit mode (PR #156840)

2025-09-30 Thread Marco Elver via llvm-branch-commits


@@ -205,6 +231,26 @@ class TypeHashMode : public ModeBase {
   }
 };
 
+/// Implementation for TokenMode::TypeHashPointerSplit.
+class TypeHashPointerSplitMode : public TypeHashMode {
+public:
+  using TypeHashMode::TypeHashMode;
+
+  uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
+if (MaxTokens == 1)
+  return 0;
+const uint64_t HalfTokens =
+(MaxTokens ? MaxTokens : std::numeric_limits::max()) / 2;
+const auto [N, H] = getHash(CB, ORE);
+if (!N)
+  return H; // fallback token

melver wrote:

It goes into the pointer-less bucket by default:
```
--- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
@@ -242,8 +242,12 @@ public:
 const uint64_t HalfTokens =
 (MaxTokens ? MaxTokens : std::numeric_limits::max()) / 2;
 const auto [N, H] = getHash(CB, ORE);
-if (!N)
-  return H; // fallback token
+if (!N) {
+  // Pick the fallback token (ClFallbackToken), which by default is 0,
+  // meaning it'll fall into the pointer-less bucket. Override by setting
+  // -alloc-token-fallback if that is the wrong choice.
+  return H;
+}
```

Advanced users could e.g. set -alloc-token-fallback to some bucket outside the 
range of normal buckets, but I have no intuition if that's a good or bad choice 
if this is used for heap hardening strategies. So I wouldn't want to expose 
this as a standard "frontend option" either.

E.g. we're having discussions if we should pick the pointer-containing bucket 
as the default for fallbacks. The intuition behind that is to "protect 
pointer-containing allocations" from (more likely) buggy plain data/buffer 
allocations, but at the same time we could stick a plain buffer allocation 
(where inference failed) into the fallback which makes the whole point moot. 
Ideally we end up with few/no fallback cases.

https://github.com/llvm/llvm-project/pull/156840
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/21.x: [libc++] Implement comparison operators for `tuple` added in C++23 (#148799) (PR #151808)

2025-09-30 Thread Hristo Hristov via llvm-branch-commits

H-G-Hristov wrote:

> @frederick-vs-ja If this isn't landing in LLVM21? Can you update the release 
> notes, etc.

@frederick-vs-ja ping 

https://github.com/llvm/llvm-project/pull/151808
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LoopUnroll] Fix block frequencies when no runtime (PR #157754)

2025-09-30 Thread Joel E. Denny via llvm-branch-commits

https://github.com/jdenny-ornl updated 
https://github.com/llvm/llvm-project/pull/157754

>From 75a8df62df2ef7e8c02d7a76120e57e2dd1a1539 Mon Sep 17 00:00:00 2001
From: "Joel E. Denny" 
Date: Tue, 9 Sep 2025 17:33:38 -0400
Subject: [PATCH 1/3] [LoopUnroll] Fix block frequencies when no runtime

This patch implements the LoopUnroll changes discussed in [[RFC] Fix
Loop Transformations to Preserve Block
Frequencies](https://discourse.llvm.org/t/rfc-fix-loop-transformations-to-preserve-block-frequencies/85785)
and is thus another step in addressing issue #135812.

In summary, for the case of partial loop unrolling without a runtime,
this patch changes LoopUnroll to:

- Maintain branch weights consistently with the original loop for the
  sake of preserving the total frequency of the original loop body.
- Store the new estimated trip count in the
  `llvm.loop.estimated_trip_count` metadata, introduced by PR #148758.
- Correct the new estimated trip count (e.g., 3 instead of 2) when the
  original estimated trip count (e.g., 10) divided by the unroll count
  (e.g., 4) leaves a remainder (e.g., 2).

There are loop unrolling cases this patch does not fully fix, such as
partial unrolling with a runtime and complete unrolling, and there are
two associated tests this patch marks as XFAIL.  They will be
addressed in future patches that should land with this patch.
---
 llvm/lib/Transforms/Utils/LoopUnroll.cpp  | 36 --
 .../peel.ll}  |  0
 .../branch-weights-freq/unroll-partial.ll | 68 +++
 .../LoopUnroll/runtime-loop-branchweight.ll   |  1 +
 .../LoopUnroll/unroll-heuristics-pgo.ll   |  1 +
 5 files changed, 100 insertions(+), 6 deletions(-)
 rename llvm/test/Transforms/LoopUnroll/{peel-branch-weights-freq.ll => 
branch-weights-freq/peel.ll} (100%)
 create mode 100644 
llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll

diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp 
b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 8a6c7789d1372..93c43396c54b6 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -499,9 +499,8 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo 
*LI,
 
   const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
   const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
-  unsigned EstimatedLoopInvocationWeight = 0;
   std::optional OriginalTripCount =
-  llvm::getLoopEstimatedTripCount(L, &EstimatedLoopInvocationWeight);
+  llvm::getLoopEstimatedTripCount(L);
 
   // Effectively "DCE" unrolled iterations that are beyond the max tripcount
   // and will never be executed.
@@ -1130,10 +1129,35 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, 
LoopInfo *LI,
 // We shouldn't try to use `L` anymore.
 L = nullptr;
   } else if (OriginalTripCount) {
-// Update the trip count. Note that the remainder has already logic
-// computing it in `UnrollRuntimeLoopRemainder`.
-setLoopEstimatedTripCount(L, *OriginalTripCount / ULO.Count,
-  EstimatedLoopInvocationWeight);
+// Update metadata for the estimated trip count.
+//
+// If ULO.Runtime, UnrollRuntimeLoopRemainder handles branch weights for 
the
+// remainder loop it creates, and the unrolled loop's branch weights are
+// adjusted below.  Otherwise, if unrolled loop iterations' latches become
+// unconditional, branch weights are adjusted above.  Otherwise, the
+// original loop's branch weights are correct for the unrolled loop, so do
+// not adjust them.
+// FIXME: Actually handle such unconditional latches and ULO.Runtime.
+//
+// For example, consider what happens if the unroll count is 4 for a loop
+// with an estimated trip count of 10 when we do not create a remainder 
loop
+// and all iterations' latches remain conditional.  Each unrolled
+// iteration's latch still has the same probability of exiting the loop as
+// it did when in the original loop, and thus it should still have the same
+// branch weights.  Each unrolled iteration's non-zero probability of
+// exiting already appropriately reduces the probability of reaching the
+// remaining iterations just as it did in the original loop.  Trying to 
also
+// adjust the branch weights of the final unrolled iteration's latch (i.e.,
+// the backedge for the unrolled loop as a whole) to reflect its new trip
+// count of 3 will erroneously further reduce its block frequencies.
+// However, in case an analysis later needs to estimate the trip count of
+// the unrolled loop as a whole without considering the branch weights for
+// each unrolled iteration's latch within it, we store the new trip count 
as
+// separate metadata.
+unsigned NewTripCount = *OriginalTripCount / ULO.Count;
+if (!ULO.Runtime && *OriginalTripCount % ULO.Count)
+  NewTripCount += 1;
+setLoopEstima

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: prevent false positives due to jump tables (PR #138884)

2025-09-30 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/138884

>From f568ed034b0d9d91654f842653cd7260fe6d773d Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 6 May 2025 11:31:03 +0300
Subject: [PATCH] [BOLT] Gadget scanner: prevent false positives due to jump
 tables

As part of PAuth hardening, AArch64 LLVM backend can use a special
BR_JumpTable pseudo (enabled by -faarch64-jump-table-hardening
Clang option) which is expanded in the AsmPrinter into a contiguous
sequence without unsafe instructions in the middle.

This commit adds another target-specific callback to MCPlusBuilder
to make it possible to inhibit false positives for known-safe jump
table dispatch sequences. Without special handling, the branch
instruction is likely to be reported as a non-protected call (as its
destination is not produced by an auth instruction, PC-relative address
materialization, etc.) and possibly as a tail call being performed with
unsafe link register (as the detection whether the branch instruction
is a tail call is an heuristic).

For now, only the specific instruction sequence used by the AArch64
LLVM backend is matched.
---
 bolt/include/bolt/Core/MCInstUtils.h  |   9 +
 bolt/include/bolt/Core/MCPlusBuilder.h|  14 +
 bolt/lib/Core/MCInstUtils.cpp |  20 +
 bolt/lib/Passes/PAuthGadgetScanner.cpp|  10 +
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  73 ++
 .../AArch64/gs-pauth-jump-table.s | 703 ++
 6 files changed, 829 insertions(+)
 create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-jump-table.s

diff --git a/bolt/include/bolt/Core/MCInstUtils.h 
b/bolt/include/bolt/Core/MCInstUtils.h
index 291e31e0e0fdf..a240ca07bd02c 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -101,6 +101,15 @@ class MCInstReference {
   /// this function may be called from multithreaded code.
   uint64_t computeAddress(const MCCodeEmitter *Emitter = nullptr) const;
 
+  /// Returns the only preceding instruction, or std::nullopt if multiple or no
+  /// predecessors are possible.
+  ///
+  /// If CFG information is available, basic block boundary can be crossed,
+  /// provided there is exactly one predecessor. If CFG is not available, the
+  /// preceding instruction in the offset order is returned, unless this is the
+  /// first instruction of the function.
+  std::optional getSinglePredecessor();
+
   raw_ostream &print(raw_ostream &OS) const;
 
 private:
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h 
b/bolt/include/bolt/Core/MCPlusBuilder.h
index 5b711b0e27bab..8c191b113afbc 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -15,6 +15,7 @@
 #define BOLT_CORE_MCPLUSBUILDER_H
 
 #include "bolt/Core/BinaryBasicBlock.h"
+#include "bolt/Core/MCInstUtils.h"
 #include "bolt/Core/MCPlus.h"
 #include "bolt/Core/Relocation.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -718,6 +719,19 @@ class MCPlusBuilder {
 return std::nullopt;
   }
 
+  /// Tests if BranchInst corresponds to an instruction sequence which is known
+  /// to be a safe dispatch via jump table.
+  ///
+  /// The target can decide which instruction sequences to consider "safe" from
+  /// the Pointer Authentication point of view, such as any jump table dispatch
+  /// sequence without function calls inside, any sequence which is contiguous,
+  /// or only some specific well-known sequences.
+  virtual bool
+  isSafeJumpTableBranchForPtrAuth(MCInstReference BranchInst) const {
+llvm_unreachable("not implemented");
+return false;
+  }
+
   virtual bool isTerminator(const MCInst &Inst) const;
 
   virtual bool isNoop(const MCInst &Inst) const {
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
index f505bf73c64eb..f07616cdb86b9 100644
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -84,3 +84,23 @@ raw_ostream &MCInstReference::print(raw_ostream &OS) const {
   OS << ">";
   return OS;
 }
+
+std::optional MCInstReference::getSinglePredecessor() {
+  if (const RefInBB *Ref = tryGetRefInBB()) {
+if (Ref->Index != 0)
+  return MCInstReference(*Ref->BB, Ref->Index - 1);
+
+if (Ref->BB->pred_size() != 1)
+  return std::nullopt;
+
+BinaryBasicBlock &PredBB = **Ref->BB->pred_begin();
+assert(!PredBB.empty() && "Empty basic blocks are not supported yet");
+return MCInstReference(PredBB, *PredBB.rbegin());
+  }
+
+  const RefInBF &Ref = getRefInBF();
+  if (Ref.It == Ref.BF->instrs().begin())
+return std::nullopt;
+
+  return MCInstReference(*Ref.BF, std::prev(Ref.It));
+}
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index cfe4b6ba785e4..af453a5aa6871 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -1364,6 +1364,11 @@ shouldReportUnsafeTailCall(const BinaryContext &BC, 
const BinaryFunction

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: make use of C++17 features and LLVM helpers (PR #141665)

2025-09-30 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/141665

>From 848258caa8baa403782f72e29cb9fce12305d309 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 27 May 2025 21:06:03 +0300
Subject: [PATCH] [BOLT] Gadget scanner: make use of C++17 features and LLVM
 helpers

Perform trivial syntactical cleanups:
* make use of structured binding declarations
* use LLVM utility functions when appropriate
* omit braces around single expression inside single-line LLVM_DEBUG()

This patch is NFC aside from minor debug output changes.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 64 +--
 .../AArch64/gs-pauth-debug-output.s   | 14 ++--
 2 files changed, 37 insertions(+), 41 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 3acc6654115f4..b032561a43274 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -88,8 +88,8 @@ class TrackedRegisters {
   TrackedRegisters(ArrayRef RegsToTrack)
   : Registers(RegsToTrack),
 RegToIndexMapping(getMappingSize(RegsToTrack), NoIndex) {
-for (unsigned I = 0; I < RegsToTrack.size(); ++I)
-  RegToIndexMapping[RegsToTrack[I]] = I;
+for (auto [MappedIndex, Reg] : llvm::enumerate(RegsToTrack))
+  RegToIndexMapping[Reg] = MappedIndex;
   }
 
   ArrayRef getRegisters() const { return Registers; }
@@ -203,9 +203,9 @@ struct SrcState {
 
 SafeToDerefRegs &= StateIn.SafeToDerefRegs;
 TrustedRegs &= StateIn.TrustedRegs;
-for (unsigned I = 0; I < LastInstWritingReg.size(); ++I)
-  for (const MCInst *J : StateIn.LastInstWritingReg[I])
-LastInstWritingReg[I].insert(J);
+for (auto [ThisSet, OtherSet] :
+ llvm::zip_equal(LastInstWritingReg, StateIn.LastInstWritingReg))
+  ThisSet.insert_range(OtherSet);
 return *this;
   }
 
@@ -224,11 +224,9 @@ struct SrcState {
 static void printInstsShort(raw_ostream &OS,
 ArrayRef Insts) {
   OS << "Insts: ";
-  for (unsigned I = 0; I < Insts.size(); ++I) {
-auto &Set = Insts[I];
+  for (auto [I, PtrSet] : llvm::enumerate(Insts)) {
 OS << "[" << I << "](";
-for (const MCInst *MCInstP : Set)
-  OS << MCInstP << " ";
+interleave(PtrSet, OS, " ");
 OS << ")";
   }
 }
@@ -416,8 +414,9 @@ class SrcSafetyAnalysis {
 // ... an address can be updated in a safe manner, producing the result
 // which is as trusted as the input address.
 if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Point)) {
-  if (Cur.SafeToDerefRegs[DstAndSrc->second])
-Regs.push_back(DstAndSrc->first);
+  auto [DstReg, SrcReg] = *DstAndSrc;
+  if (Cur.SafeToDerefRegs[SrcReg])
+Regs.push_back(DstReg);
 }
 
 // Make sure explicit checker sequence keeps register safe-to-dereference
@@ -469,8 +468,9 @@ class SrcSafetyAnalysis {
 // ... an address can be updated in a safe manner, producing the result
 // which is as trusted as the input address.
 if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Point)) {
-  if (Cur.TrustedRegs[DstAndSrc->second])
-Regs.push_back(DstAndSrc->first);
+  auto [DstReg, SrcReg] = *DstAndSrc;
+  if (Cur.TrustedRegs[SrcReg])
+Regs.push_back(DstReg);
 }
 
 return Regs;
@@ -865,9 +865,9 @@ struct DstState {
   return (*this = StateIn);
 
 CannotEscapeUnchecked &= StateIn.CannotEscapeUnchecked;
-for (unsigned I = 0; I < FirstInstLeakingReg.size(); ++I)
-  for (const MCInst *J : StateIn.FirstInstLeakingReg[I])
-FirstInstLeakingReg[I].insert(J);
+for (auto [ThisSet, OtherSet] :
+ llvm::zip_equal(FirstInstLeakingReg, StateIn.FirstInstLeakingReg))
+  ThisSet.insert_range(OtherSet);
 return *this;
   }
 
@@ -1033,8 +1033,7 @@ class DstSafetyAnalysis {
 
 // ... an address can be updated in a safe manner, or
 if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Inst)) {
-  MCPhysReg DstReg, SrcReg;
-  std::tie(DstReg, SrcReg) = *DstAndSrc;
+  auto [DstReg, SrcReg] = *DstAndSrc;
   // Note that *all* registers containing the derived values must be safe,
   // both source and destination ones. No temporaries are supported at now.
   if (Cur.CannotEscapeUnchecked[SrcReg] &&
@@ -1074,7 +1073,7 @@ class DstSafetyAnalysis {
 // If this instruction terminates the program immediately, no
 // authentication oracles are possible past this point.
 if (BC.MIB->isTrap(Point)) {
-  LLVM_DEBUG({ traceInst(BC, "Trap instruction found", Point); });
+  LLVM_DEBUG(traceInst(BC, "Trap instruction found", Point));
   DstState Next(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
   Next.CannotEscapeUnchecked.set();
   return Next;
@@ -1249,7 +1248,7 @@ class CFGUnawareDstSafetyAnalysis : public 
DstSafetyAnalysis,
   // starting to analyze Inst.
   

[llvm-branch-commits] [llvm] [llvm][mustache] Avoid redundant saves in accessor splitting (PR #159197)

2025-09-30 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/159197

>From 8401695d2adc3c509a2ed9eb0b4e8cd31a2ce3a5 Mon Sep 17 00:00:00 2001
From: Paul Kirth 
Date: Tue, 16 Sep 2025 00:11:47 -0700
Subject: [PATCH] [llvm][mustache] Avoid redundant saves in accessor splitting

The splitMustacheString function was saving StringRefs that
were already backed by an arena-allocated string. This was
unnecessary work. This change removes the redundant
Ctx.Saver.save() call.

This optimization provides a small but measurable performance
improvement on top of the single-pass tokenizer, most notably
reducing branch misses.

  Metric | Baseline | Optimized | Change
  -- |  | - | ---
  Time (ms)  | 35.77| 35.57 | -0.56%
  Cycles | 35.16M   | 34.91M| -0.71%
  Instructions   | 85.77M   | 85.54M| -0.27%
  Branch Misses  | 113.9K   | 111.9K| -1.76%
  Cache Misses   | 237.7K   | 242.1K| +1.85%
---
 llvm/lib/Support/Mustache.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp
index 0053a425b758d..4786242cdfba9 100644
--- a/llvm/lib/Support/Mustache.cpp
+++ b/llvm/lib/Support/Mustache.cpp
@@ -51,7 +51,7 @@ static Accessor splitMustacheString(StringRef Str, 
MustacheContext &Ctx) {
   std::tie(Part, Str) = Str.split('.');
   // Each part of the accessor needs to be saved to the arena
   // to ensure it has a stable address.
-  Tokens.push_back(Ctx.Saver.save(Part.trim()));
+  Tokens.push_back(Part.trim());
 }
   }
   // Now, allocate memory for the array of StringRefs in the arena.

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LoongArch][DAGCombiner] Combine vand (vnot ..) to vandn (PR #161037)

2025-09-30 Thread Zhaoxin Yang via llvm-branch-commits

https://github.com/ylzsx updated 
https://github.com/llvm/llvm-project/pull/161037

>From 040e64772f8b48024f3390e5e402190c501d9302 Mon Sep 17 00:00:00 2001
From: yangzhaoxin 
Date: Thu, 25 Sep 2025 16:42:24 +0800
Subject: [PATCH 1/5] [LoongArch][DAGCombiner] Combine xor (and ..) to vandn

After this commit, DAGCombiner will have more opportunities to perform
vector folding. This patch includes several foldings, as follows:
- VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y))
- VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
---
 .../LoongArch/LoongArchISelLowering.cpp   | 155 ++
 .../Target/LoongArch/LoongArchISelLowering.h  |   3 +
 .../LoongArch/LoongArchLASXInstrInfo.td   |  26 +--
 .../Target/LoongArch/LoongArchLSXInstrInfo.td |  27 +--
 4 files changed, 185 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 94f53d5b85f10..30d4bac25da78 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -4939,6 +4939,96 @@ void LoongArchTargetLowering::ReplaceNodeResults(
   }
 }
 
+// Check if all elements in build_vector are the same or undef, and if so,
+// return true and set the splat element in SplatValue.
+static bool isSplatOrUndef(SDNode *N, SDValue &SplatValue) {
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+return false;
+  for (SDValue Op : N->ops()) {
+if (!Op.isUndef() && SplatValue && Op != SplatValue)
+  return false;
+if (!Op.isUndef())
+  SplatValue = Op;
+  }
+  return true;
+}
+
+// Helper to attempt to return a cheaper, bit-inverted version of \p V.
+static SDValue isNOT(SDValue V, SelectionDAG &DAG) {
+  // TODO: don't always ignore oneuse constraints.
+  V = peekThroughBitcasts(V);
+  EVT VT = V.getValueType();
+
+  // Match not(xor X, -1) -> X.
+  if (V.getOpcode() == ISD::XOR &&
+  (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
+   isAllOnesConstant(V.getOperand(1
+return V.getOperand(0);
+
+  // Match not(extract_subvector(not(X)) -> extract_subvector(X).
+  if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+  (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
+if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
+  Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
+  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
+ V.getOperand(1));
+}
+  }
+
+  // Match not(SplatVector(not(X)) -> SplatVector(X).
+  SDValue SplatValue;
+  if (isSplatOrUndef(V.getNode(), SplatValue) &&
+  V->isOnlyUserOf(SplatValue.getNode())) {
+if (SDValue Not = isNOT(SplatValue, DAG)) {
+  Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
+  return DAG.getSplat(VT, SDLoc(Not), Not);
+}
+  }
+
+  // Match not(or(not(X),not(Y))) -> and(X, Y).
+  if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) 
&&
+  V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
+// TODO: Handle cases with single NOT operand -> VANDN
+if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
+  if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
+return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
+   DAG.getBitcast(VT, Op1));
+  }
+
+  // TODO: Add more matching patterns. Such as,
+  // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
+  // not(slt(C, X)) -> slt(X - 1, C)
+
+  return SDValue();
+}
+
+/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
+static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL,
+  SelectionDAG &DAG) {
+  assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
+
+  MVT VT = N->getSimpleValueType(0);
+  if (!VT.is128BitVector() && !VT.is256BitVector())
+return SDValue();
+
+  SDValue X, Y;
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  if (SDValue Not = isNOT(N0, DAG)) {
+X = Not;
+Y = N1;
+  } else if (SDValue Not = isNOT(N1, DAG)) {
+X = Not;
+Y = N0;
+  } else
+return SDValue();
+
+  X = DAG.getBitcast(VT, X);
+  Y = DAG.getBitcast(VT, Y);
+  return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
+}
+
 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
  TargetLowering::DAGCombinerInfo &DCI,
  const LoongArchSubtarget &Subtarget) {
@@ -4960,6 +5050,9 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG 
&DAG,
   if (!Subtarget.has32S())
 return SDValue();
 
+  if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
+return R;
+
   // Op's second operand must be a shifted mask.
   if (!(CN = dyn_cast(SecondOperand)) ||
   !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
@@ -6628,6 +6721,65 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, Select

[llvm-branch-commits] [llvm] [AllocToken, Clang] Implement __builtin_infer_alloc_token() and llvm.alloc.token.id (PR #156842)

2025-09-30 Thread Vitaly Buka via llvm-branch-commits


@@ -334,21 +348,30 @@ bool AllocToken::instrumentFunction(Function &F) {
   // Do not apply any instrumentation for naked functions.
   if (F.hasFnAttribute(Attribute::Naked))
 return false;
-  if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
-return false;
   // Don't touch available_externally functions, their actual body is 
elsewhere.
   if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
 return false;
-  // Only instrument functions that have the sanitize_alloc_token attribute.
-  if (!F.hasFnAttribute(Attribute::SanitizeAllocToken))
-return false;
 
   auto &ORE = FAM.getResult(F);
   auto &TLI = FAM.getResult(F);
   SmallVector, 4> AllocCalls;
+  SmallVector IntrinsicInsts;
 
   // Collect all allocation calls to avoid iterator invalidation.
   for (Instruction &I : instructions(F)) {
+// Collect all alloc_token_* intrinsics.
+if (auto *II = dyn_cast(&I);
+II && II->getIntrinsicID() == Intrinsic::alloc_token_id) {
+  IntrinsicInsts.emplace_back(II);
+  continue;
+}
+
+// Only instrument functions that have the sanitize_alloc_token attribute.

vitalybuka wrote:

It stead of continue on each iteration of look, cleanner would be

Maybe move outside of the loop and store into 

```
bool InstrumentFuonction = !F.hasFnAttribute(Attribute::SanitizeAllocToken) && 
!F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);

```

https://github.com/llvm/llvm-project/pull/156842
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [llvm][mustache] Avoid extra allocations in parseSection (PR #159199)

2025-09-30 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/159199

>From f048f53b5c4b4fa7d7fa67d880d06e52cdfb9524 Mon Sep 17 00:00:00 2001
From: Paul Kirth 
Date: Tue, 16 Sep 2025 09:40:04 -0700
Subject: [PATCH] [llvm][mustache] Avoid extra allocations in parseSection

We don't need to have extra allocations when concatenating raw bodies.
---
 llvm/lib/Support/Mustache.cpp | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp
index 8eebeaec11925..5bd3c8d7d0d6b 100644
--- a/llvm/lib/Support/Mustache.cpp
+++ b/llvm/lib/Support/Mustache.cpp
@@ -602,9 +602,16 @@ void Parser::parseSection(ASTNode *Parent, ASTNode::Type 
Ty,
   size_t Start = CurrentPtr;
   parseMustache(CurrentNode);
   const size_t End = CurrentPtr - 1;
+
+  size_t RawBodySize = 0;
+  for (size_t I = Start; I < End; ++I)
+RawBodySize += Tokens[I].RawBody.size();
+
   SmallString<128> RawBody;
-  for (std::size_t I = Start; I < End; I++)
+  RawBody.reserve(RawBodySize);
+  for (std::size_t I = Start; I < End; ++I)
 RawBody += Tokens[I].RawBody;
+
   CurrentNode->setRawBody(Ctx.Saver.save(StringRef(RawBody)));
   Parent->addChild(CurrentNode);
 }

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [llvm] [llvm][mustache] Use BumpPtrAllocator to save ASTNodes (PR #159194)

2025-09-30 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/159194

>From f9e86b823bb66126bd6ebe4f72c8ee68462be745 Mon Sep 17 00:00:00 2001
From: Paul Kirth 
Date: Mon, 15 Sep 2025 16:26:11 -0700
Subject: [PATCH] [llvm][mustache] Use BumpPtrAllocator to save ASTNodes

We make the Mustache ASTNodes usable in the arena by first removing all
of the memory owning data structures, like std::vector, std::unique_ptr,
and SmallVector. We use standard LLVM list types to hold this data
instead, and make use of a UniqueStringSaver to hold the various
templates strings.

Additionally, update clang-doc APIs to use the new interfaces.

Future work can make better use of Twine interfaces to help avoid any
intermediate copies or allocations.
---
 .../clang-doc/HTMLMustacheGenerator.cpp   |  22 +-
 llvm/include/llvm/Support/Mustache.h  |  15 +-
 llvm/lib/Support/Mustache.cpp | 177 +++--
 llvm/unittests/Support/MustacheTest.cpp   | 725 ++
 .../llvm-test-mustache-spec.cpp   |   5 +-
 5 files changed, 697 insertions(+), 247 deletions(-)

diff --git a/clang-tools-extra/clang-doc/HTMLMustacheGenerator.cpp 
b/clang-tools-extra/clang-doc/HTMLMustacheGenerator.cpp
index b37dc272ea156..b4b9322b0500a 100644
--- a/clang-tools-extra/clang-doc/HTMLMustacheGenerator.cpp
+++ b/clang-tools-extra/clang-doc/HTMLMustacheGenerator.cpp
@@ -46,7 +46,13 @@ class MustacheHTMLGenerator : public Generator {
const ClangDocContext &CDCtx) override;
 };
 
-class MustacheTemplateFile : public Template {
+class MustacheTemplateFile {
+  BumpPtrAllocator Allocator;
+  StringSaver Saver;
+  MustacheContext Ctx;
+  Template T;
+  std::unique_ptr Buffer;
+
 public:
   static Expected>
   createMustacheFile(StringRef FileName) {
@@ -54,10 +60,8 @@ class MustacheTemplateFile : public Template {
 MemoryBuffer::getFile(FileName);
 if (auto EC = BufferOrError.getError())
   return createFileOpenError(FileName, EC);
-
-std::unique_ptr Buffer = std::move(BufferOrError.get());
-StringRef FileContent = Buffer->getBuffer();
-return std::make_unique(FileContent);
+return std::make_unique(
+std::move(BufferOrError.get()));
   }
 
   Error registerPartialFile(StringRef Name, StringRef FileName) {
@@ -68,11 +72,15 @@ class MustacheTemplateFile : public Template {
 
 std::unique_ptr Buffer = std::move(BufferOrError.get());
 StringRef FileContent = Buffer->getBuffer();
-registerPartial(Name.str(), FileContent.str());
+T.registerPartial(Name.str(), FileContent.str());
 return Error::success();
   }
 
-  MustacheTemplateFile(StringRef TemplateStr) : Template(TemplateStr) {}
+  void render(json::Value &V, raw_ostream &OS) { T.render(V, OS); }
+
+  MustacheTemplateFile(std::unique_ptr &&B)
+  : Saver(Allocator), Ctx(Allocator, Saver), T(B->getBuffer(), Ctx),
+Buffer(std::move(B)) {}
 };
 
 static std::unique_ptr NamespaceTemplate = nullptr;
diff --git a/llvm/include/llvm/Support/Mustache.h 
b/llvm/include/llvm/Support/Mustache.h
index ee9f40638fd12..83047f2aafff6 100644
--- a/llvm/include/llvm/Support/Mustache.h
+++ b/llvm/include/llvm/Support/Mustache.h
@@ -71,6 +71,8 @@
 
 #include "Error.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/JSON.h"
@@ -84,10 +86,15 @@ using Lambda = std::function;
 using SectionLambda = std::function;
 
 class ASTNode;
-using AstPtr = std::unique_ptr;
+using AstPtr = ASTNode *;
 using EscapeMap = DenseMap;
+using ASTNodeList = iplist;
 
 struct MustacheContext {
+  MustacheContext(BumpPtrAllocator &Allocator, StringSaver &Saver)
+  : Allocator(Allocator), Saver(Saver) {}
+  BumpPtrAllocator &Allocator;
+  StringSaver &Saver;
   StringMap Partials;
   StringMap Lambdas;
   StringMap SectionLambdas;
@@ -98,7 +105,7 @@ struct MustacheContext {
 // and Lambdas that are registered with it.
 class Template {
 public:
-  LLVM_ABI Template(StringRef TemplateStr);
+  LLVM_ABI Template(StringRef TemplateStr, MustacheContext &Ctx);
 
   Template(const Template &) = delete;
 
@@ -110,7 +117,7 @@ class Template {
   // type.
   LLVM_ABI ~Template();
 
-  LLVM_ABI Template &operator=(Template &&Other) noexcept;
+  Template &operator=(Template &&) = delete;
 
   LLVM_ABI void render(const llvm::json::Value &Data, llvm::raw_ostream &OS);
 
@@ -126,7 +133,7 @@ class Template {
   LLVM_ABI void overrideEscapeCharacters(DenseMap Escapes);
 
 private:
-  MustacheContext Ctx;
+  MustacheContext &Ctx;
   AstPtr Tree;
 };
 } // namespace llvm::mustache
diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp
index ba22cb75cbf5d..eadf4c1f7cda9 100644
--- a/llvm/lib/Support/Mustache.cpp
+++ b/llvm/lib/Support/Mustache.cpp
@@ -20,7 +20,7 @@ using namespace llvm::mustache;
 
 namespace {
 
-using Accessor = SmallVector;
+using Acc

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: make use of C++17 features and LLVM helpers (PR #141665)

2025-09-30 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/141665

>From 12320f045000bae8bedc4783b4e82429c57d0998 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 27 May 2025 21:06:03 +0300
Subject: [PATCH] [BOLT] Gadget scanner: make use of C++17 features and LLVM
 helpers

Perform trivial syntactical cleanups:
* make use of structured binding declarations
* use LLVM utility functions when appropriate
* omit braces around single expression inside single-line LLVM_DEBUG()

This patch is NFC aside from minor debug output changes.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 64 +--
 .../AArch64/gs-pauth-debug-output.s   | 14 ++--
 2 files changed, 37 insertions(+), 41 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 3acc6654115f4..b032561a43274 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -88,8 +88,8 @@ class TrackedRegisters {
   TrackedRegisters(ArrayRef RegsToTrack)
   : Registers(RegsToTrack),
 RegToIndexMapping(getMappingSize(RegsToTrack), NoIndex) {
-for (unsigned I = 0; I < RegsToTrack.size(); ++I)
-  RegToIndexMapping[RegsToTrack[I]] = I;
+for (auto [MappedIndex, Reg] : llvm::enumerate(RegsToTrack))
+  RegToIndexMapping[Reg] = MappedIndex;
   }
 
   ArrayRef getRegisters() const { return Registers; }
@@ -203,9 +203,9 @@ struct SrcState {
 
 SafeToDerefRegs &= StateIn.SafeToDerefRegs;
 TrustedRegs &= StateIn.TrustedRegs;
-for (unsigned I = 0; I < LastInstWritingReg.size(); ++I)
-  for (const MCInst *J : StateIn.LastInstWritingReg[I])
-LastInstWritingReg[I].insert(J);
+for (auto [ThisSet, OtherSet] :
+ llvm::zip_equal(LastInstWritingReg, StateIn.LastInstWritingReg))
+  ThisSet.insert_range(OtherSet);
 return *this;
   }
 
@@ -224,11 +224,9 @@ struct SrcState {
 static void printInstsShort(raw_ostream &OS,
 ArrayRef Insts) {
   OS << "Insts: ";
-  for (unsigned I = 0; I < Insts.size(); ++I) {
-auto &Set = Insts[I];
+  for (auto [I, PtrSet] : llvm::enumerate(Insts)) {
 OS << "[" << I << "](";
-for (const MCInst *MCInstP : Set)
-  OS << MCInstP << " ";
+interleave(PtrSet, OS, " ");
 OS << ")";
   }
 }
@@ -416,8 +414,9 @@ class SrcSafetyAnalysis {
 // ... an address can be updated in a safe manner, producing the result
 // which is as trusted as the input address.
 if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Point)) {
-  if (Cur.SafeToDerefRegs[DstAndSrc->second])
-Regs.push_back(DstAndSrc->first);
+  auto [DstReg, SrcReg] = *DstAndSrc;
+  if (Cur.SafeToDerefRegs[SrcReg])
+Regs.push_back(DstReg);
 }
 
 // Make sure explicit checker sequence keeps register safe-to-dereference
@@ -469,8 +468,9 @@ class SrcSafetyAnalysis {
 // ... an address can be updated in a safe manner, producing the result
 // which is as trusted as the input address.
 if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Point)) {
-  if (Cur.TrustedRegs[DstAndSrc->second])
-Regs.push_back(DstAndSrc->first);
+  auto [DstReg, SrcReg] = *DstAndSrc;
+  if (Cur.TrustedRegs[SrcReg])
+Regs.push_back(DstReg);
 }
 
 return Regs;
@@ -865,9 +865,9 @@ struct DstState {
   return (*this = StateIn);
 
 CannotEscapeUnchecked &= StateIn.CannotEscapeUnchecked;
-for (unsigned I = 0; I < FirstInstLeakingReg.size(); ++I)
-  for (const MCInst *J : StateIn.FirstInstLeakingReg[I])
-FirstInstLeakingReg[I].insert(J);
+for (auto [ThisSet, OtherSet] :
+ llvm::zip_equal(FirstInstLeakingReg, StateIn.FirstInstLeakingReg))
+  ThisSet.insert_range(OtherSet);
 return *this;
   }
 
@@ -1033,8 +1033,7 @@ class DstSafetyAnalysis {
 
 // ... an address can be updated in a safe manner, or
 if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Inst)) {
-  MCPhysReg DstReg, SrcReg;
-  std::tie(DstReg, SrcReg) = *DstAndSrc;
+  auto [DstReg, SrcReg] = *DstAndSrc;
   // Note that *all* registers containing the derived values must be safe,
   // both source and destination ones. No temporaries are supported at now.
   if (Cur.CannotEscapeUnchecked[SrcReg] &&
@@ -1074,7 +1073,7 @@ class DstSafetyAnalysis {
 // If this instruction terminates the program immediately, no
 // authentication oracles are possible past this point.
 if (BC.MIB->isTrap(Point)) {
-  LLVM_DEBUG({ traceInst(BC, "Trap instruction found", Point); });
+  LLVM_DEBUG(traceInst(BC, "Trap instruction found", Point));
   DstState Next(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
   Next.CannotEscapeUnchecked.set();
   return Next;
@@ -1249,7 +1248,7 @@ class CFGUnawareDstSafetyAnalysis : public 
DstSafetyAnalysis,
   // starting to analyze Inst.
   

[llvm-branch-commits] [llvm] f29f111 - [JITLink][MachO] Use Triple::isArm64e consistently.

2025-09-30 Thread Lang Hames via llvm-branch-commits

Author: Lang Hames
Date: 2025-10-01T11:07:37+10:00
New Revision: f29f1112f5cc467c0cdac05532770cdd15382c23

URL: 
https://github.com/llvm/llvm-project/commit/f29f1112f5cc467c0cdac05532770cdd15382c23
DIFF: 
https://github.com/llvm/llvm-project/commit/f29f1112f5cc467c0cdac05532770cdd15382c23.diff

LOG: [JITLink][MachO] Use Triple::isArm64e consistently.

Added: 


Modified: 
llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp

Removed: 




diff  --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp 
b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
index 09ac0f19a7b07..f79478038c5cb 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
@@ -599,8 +599,7 @@ Expected> 
createLinkGraphFromMachOObject_arm64(
 }
 
 static Error applyPACSigningToModInitPointers(LinkGraph &G) {
-  assert(G.getTargetTriple().getSubArch() == Triple::AArch64SubArch_arm64e &&
- "PAC signing only valid for arm64e");
+  assert(G.getTargetTriple().isArm64e() && "PAC signing only valid for 
arm64e");
 
   if (auto *ModInitSec = G.findSectionByName("__DATA,__mod_init_func")) {
 for (auto *B : ModInitSec->blocks()) {



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][omp] Add omp.tile operation (PR #160292)

2025-09-30 Thread Michael Kruse via llvm-branch-commits

https://github.com/Meinersbur edited 
https://github.com/llvm/llvm-project/pull/160292
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SIInsertWaitCnts] Remove redundant TII/TRI/MRI arguments (NFC) (PR #161357)

2025-09-30 Thread Pierre van Houtryve via llvm-branch-commits

https://github.com/Pierre-vh updated 
https://github.com/llvm/llvm-project/pull/161357

>From 73c43575873aa2bc3dfc051a49bb05fc4fc99ca9 Mon Sep 17 00:00:00 2001
From: pvanhout 
Date: Mon, 29 Sep 2025 12:24:57 +0200
Subject: [PATCH] [AMDGPU][SIInsertWaitCnts] Remove redundant TII/TRI/MRI
 arguments (NFC)

WaitCntBrackets already has a pointer to its SIInsertWaitCnt instance.
With a small change, it can directly access TII/TRI/MRI that way.
This simplifies a lot of call sites which make the code easier to follow.
---
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 121 +---
 1 file changed, 54 insertions(+), 67 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp 
b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 3f9a1f492ace5..76bfce8c0f6f9 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -418,15 +418,14 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator 
{
 class SIInsertWaitcnts {
 public:
   const GCNSubtarget *ST;
+  const SIInstrInfo *TII = nullptr;
+  const SIRegisterInfo *TRI = nullptr;
+  const MachineRegisterInfo *MRI = nullptr;
   InstCounterType SmemAccessCounter;
   InstCounterType MaxCounter;
   const unsigned *WaitEventMaskForInst;
 
 private:
-  const SIInstrInfo *TII = nullptr;
-  const SIRegisterInfo *TRI = nullptr;
-  const MachineRegisterInfo *MRI = nullptr;
-
   DenseMap SLoadAddresses;
   DenseMap PreheadersToFlush;
   MachineLoopInfo *MLI;
@@ -631,8 +630,6 @@ class WaitcntBrackets {
   bool merge(const WaitcntBrackets &Other);
 
   RegInterval getRegInterval(const MachineInstr *MI,
- const MachineRegisterInfo *MRI,
- const SIRegisterInfo *TRI,
  const MachineOperand &Op) const;
 
   bool counterOutOfOrder(InstCounterType T) const;
@@ -650,9 +647,7 @@ class WaitcntBrackets {
   void applyWaitcnt(const AMDGPU::Waitcnt &Wait);
   void applyWaitcnt(InstCounterType T, unsigned Count);
   void applyXcnt(const AMDGPU::Waitcnt &Wait);
-  void updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI,
- const MachineRegisterInfo *MRI, WaitEventType E,
- MachineInstr &MI);
+  void updateByEvent(WaitEventType E, MachineInstr &MI);
 
   unsigned hasPendingEvent() const { return PendingEvents; }
   unsigned hasPendingEvent(WaitEventType E) const {
@@ -761,10 +756,8 @@ class WaitcntBrackets {
   void setScoreByInterval(RegInterval Interval, InstCounterType CntTy,
   unsigned Score);
 
-  void setScoreByOperand(const MachineInstr *MI, const SIRegisterInfo *TRI,
- const MachineRegisterInfo *MRI,
- const MachineOperand &Op, InstCounterType CntTy,
- unsigned Val);
+  void setScoreByOperand(const MachineInstr *MI, const MachineOperand &Op,
+ InstCounterType CntTy, unsigned Val);
 
   const SIInsertWaitcnts *Context;
 
@@ -821,12 +814,13 @@ class SIInsertWaitcntsLegacy : public MachineFunctionPass 
{
 } // end anonymous namespace
 
 RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
-const MachineRegisterInfo *MRI,
-const SIRegisterInfo *TRI,
 const MachineOperand &Op) const {
   if (Op.getReg() == AMDGPU::SCC)
 return {SCC, SCC + 1};
 
+  const SIRegisterInfo *TRI = Context->TRI;
+  const MachineRegisterInfo *MRI = Context->MRI;
+
   if (!TRI->isInAllocatableClass(Op.getReg()))
 return {-1, -1};
 
@@ -891,11 +885,9 @@ void WaitcntBrackets::setScoreByInterval(RegInterval 
Interval,
 }
 
 void WaitcntBrackets::setScoreByOperand(const MachineInstr *MI,
-const SIRegisterInfo *TRI,
-const MachineRegisterInfo *MRI,
 const MachineOperand &Op,
 InstCounterType CntTy, unsigned Score) 
{
-  RegInterval Interval = getRegInterval(MI, MRI, TRI, Op);
+  RegInterval Interval = getRegInterval(MI, Op);
   setScoreByInterval(Interval, CntTy, Score);
 }
 
@@ -927,10 +919,7 @@ bool WaitcntBrackets::hasPointSamplePendingVmemTypes(
   return hasOtherPendingVmemTypes(Interval, VMEM_NOSAMPLER);
 }
 
-void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
-const SIRegisterInfo *TRI,
-const MachineRegisterInfo *MRI,
-WaitEventType E, MachineInstr &Inst) {
+void WaitcntBrackets::updateByEvent(WaitEventType E, MachineInstr &Inst) {
   InstCounterType T = eventCounter(Context->WaitEventMaskForInst, E);
 
   unsigned UB = getScoreUB(T);
@@ -943,6 +932,10 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
   PendingEvents |= 1 << E;
   setScoreUB(T, Cur

[llvm-branch-commits] [llvm] [AMDGPU][SIInsertWaitCnts] De-duplicate code (NFC) (PR #161161)

2025-09-30 Thread Pierre van Houtryve via llvm-branch-commits

https://github.com/Pierre-vh updated 
https://github.com/llvm/llvm-project/pull/161161

>From ed3d7f64f0e8366f9d97ac396710a29c80629229 Mon Sep 17 00:00:00 2001
From: pvanhout 
Date: Mon, 29 Sep 2025 09:56:19 +0200
Subject: [PATCH] [AMDGPU][SIInsertWaitCnts] De-duplicate code (NFC)

Trying to do things that'd be picked up in code review if this pass was 
re-submitted today. Changes are aimed to be straightforward and non opinionated.
---
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 64 ++---
 1 file changed, 29 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp 
b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 91136fd85c545..3f9a1f492ace5 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1853,26 +1853,24 @@ bool 
SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   assert(!MI.isMetaInstruction());
 
   AMDGPU::Waitcnt Wait;
+  const unsigned Opc = MI.getOpcode();
 
   // FIXME: This should have already been handled by the memory legalizer.
   // Removing this currently doesn't affect any lit tests, but we need to
   // verify that nothing was relying on this. The number of buffer invalidates
   // being handled here should not be expanded.
-  if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 ||
-  MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC ||
-  MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL ||
-  MI.getOpcode() == AMDGPU::BUFFER_GL0_INV ||
-  MI.getOpcode() == AMDGPU::BUFFER_GL1_INV) {
+  if (Opc == AMDGPU::BUFFER_WBINVL1 || Opc == AMDGPU::BUFFER_WBINVL1_SC ||
+  Opc == AMDGPU::BUFFER_WBINVL1_VOL || Opc == AMDGPU::BUFFER_GL0_INV ||
+  Opc == AMDGPU::BUFFER_GL1_INV) {
 Wait.LoadCnt = 0;
   }
 
   // All waits must be resolved at call return.
   // NOTE: this could be improved with knowledge of all call sites or
   //   with knowledge of the called routines.
-  if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
-  MI.getOpcode() == AMDGPU::SI_RETURN ||
-  MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
-  MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
+  if (Opc == AMDGPU::SI_RETURN_TO_EPILOG || Opc == AMDGPU::SI_RETURN ||
+  Opc == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
+  Opc == AMDGPU::S_SETPC_B64_return ||
   (MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
 Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false));
   }
@@ -1884,8 +1882,7 @@ bool 
SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   // send a message to explicitly release all VGPRs before the stores have
   // completed, but it is only safe to do this if there are no outstanding
   // scratch stores.
-  else if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
-   MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
+  else if (Opc == AMDGPU::S_ENDPGM || Opc == AMDGPU::S_ENDPGM_SAVED) {
 if (!WCG->isOptNone() &&
 (MI.getMF()->getInfo()->isDynamicVGPREnabled() 
||
  (ST->getGeneration() >= AMDGPUSubtarget::GFX11 &&
@@ -1894,8 +1891,7 @@ bool 
SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   ReleaseVGPRInsts.insert(&MI);
   }
   // Resolve vm waits before gs-done.
-  else if ((MI.getOpcode() == AMDGPU::S_SENDMSG ||
-MI.getOpcode() == AMDGPU::S_SENDMSGHALT) &&
+  else if ((Opc == AMDGPU::S_SENDMSG || Opc == AMDGPU::S_SENDMSGHALT) &&
ST->hasLegacyGeometry() &&
((MI.getOperand(0).getImm() & AMDGPU::SendMsg::ID_MASK_PreGFX11_) ==
 AMDGPU::SendMsg::ID_GS_DONE_PreGFX11)) {
@@ -1920,7 +1916,7 @@ bool 
SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
 
 // Wait for any pending GDS instruction to complete before any
 // "Always GDS" instruction.
-if (TII->isAlwaysGDS(MI.getOpcode()) && ScoreBrackets.hasPendingGDS())
+if (TII->isAlwaysGDS(Opc) && ScoreBrackets.hasPendingGDS())
   addWait(Wait, DS_CNT, ScoreBrackets.getPendingGDSWait());
 
 if (MI.isCall() && callWaitsOnFunctionEntry(MI)) {
@@ -1946,7 +1942,7 @@ bool 
SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   Wait);
 }
   }
-} else if (MI.getOpcode() == AMDGPU::S_BARRIER_WAIT) {
+} else if (Opc == AMDGPU::S_BARRIER_WAIT) {
   ScoreBrackets.tryClearSCCWriteEvent(&MI);
 } else {
   // FIXME: Should not be relying on memoperands.
@@ -2061,8 +2057,8 @@ bool 
SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   //
   // In all other cases, ensure safety by ensuring that there are no 
outstanding
   // memory operations.
-  if (MI.getOpcode() == AMDGPU::S_BARRIER &&
-  !ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) {
+  if (Opc == AMDGPU::S_BARRIER && !ST->hasAutoWaitcntBeforeBarrier() &&
+  !ST->supportsBackOffBarrier()) {
 Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/true));
   }
 
@@ -2146,19 +2142,19 @@ bool SIIns

[llvm-branch-commits] [llvm] [AMDGPU][SIInsertWaitCnts] De-duplicate code (NFC) (PR #161161)

2025-09-30 Thread Pierre van Houtryve via llvm-branch-commits

https://github.com/Pierre-vh updated 
https://github.com/llvm/llvm-project/pull/161161

>From ed3d7f64f0e8366f9d97ac396710a29c80629229 Mon Sep 17 00:00:00 2001
From: pvanhout 
Date: Mon, 29 Sep 2025 09:56:19 +0200
Subject: [PATCH] [AMDGPU][SIInsertWaitCnts] De-duplicate code (NFC)

Trying to do things that'd be picked up in code review if this pass was 
re-submitted today. Changes are aimed to be straightforward and non opinionated.
---
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 64 ++---
 1 file changed, 29 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp 
b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 91136fd85c545..3f9a1f492ace5 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1853,26 +1853,24 @@ bool 
SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   assert(!MI.isMetaInstruction());
 
   AMDGPU::Waitcnt Wait;
+  const unsigned Opc = MI.getOpcode();
 
   // FIXME: This should have already been handled by the memory legalizer.
   // Removing this currently doesn't affect any lit tests, but we need to
   // verify that nothing was relying on this. The number of buffer invalidates
   // being handled here should not be expanded.
-  if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 ||
-  MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC ||
-  MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL ||
-  MI.getOpcode() == AMDGPU::BUFFER_GL0_INV ||
-  MI.getOpcode() == AMDGPU::BUFFER_GL1_INV) {
+  if (Opc == AMDGPU::BUFFER_WBINVL1 || Opc == AMDGPU::BUFFER_WBINVL1_SC ||
+  Opc == AMDGPU::BUFFER_WBINVL1_VOL || Opc == AMDGPU::BUFFER_GL0_INV ||
+  Opc == AMDGPU::BUFFER_GL1_INV) {
 Wait.LoadCnt = 0;
   }
 
   // All waits must be resolved at call return.
   // NOTE: this could be improved with knowledge of all call sites or
   //   with knowledge of the called routines.
-  if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
-  MI.getOpcode() == AMDGPU::SI_RETURN ||
-  MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
-  MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
+  if (Opc == AMDGPU::SI_RETURN_TO_EPILOG || Opc == AMDGPU::SI_RETURN ||
+  Opc == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
+  Opc == AMDGPU::S_SETPC_B64_return ||
   (MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
 Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false));
   }
@@ -1884,8 +1882,7 @@ bool 
SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   // send a message to explicitly release all VGPRs before the stores have
   // completed, but it is only safe to do this if there are no outstanding
   // scratch stores.
-  else if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
-   MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
+  else if (Opc == AMDGPU::S_ENDPGM || Opc == AMDGPU::S_ENDPGM_SAVED) {
 if (!WCG->isOptNone() &&
 (MI.getMF()->getInfo()->isDynamicVGPREnabled() 
||
  (ST->getGeneration() >= AMDGPUSubtarget::GFX11 &&
@@ -1894,8 +1891,7 @@ bool 
SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   ReleaseVGPRInsts.insert(&MI);
   }
   // Resolve vm waits before gs-done.
-  else if ((MI.getOpcode() == AMDGPU::S_SENDMSG ||
-MI.getOpcode() == AMDGPU::S_SENDMSGHALT) &&
+  else if ((Opc == AMDGPU::S_SENDMSG || Opc == AMDGPU::S_SENDMSGHALT) &&
ST->hasLegacyGeometry() &&
((MI.getOperand(0).getImm() & AMDGPU::SendMsg::ID_MASK_PreGFX11_) ==
 AMDGPU::SendMsg::ID_GS_DONE_PreGFX11)) {
@@ -1920,7 +1916,7 @@ bool 
SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
 
 // Wait for any pending GDS instruction to complete before any
 // "Always GDS" instruction.
-if (TII->isAlwaysGDS(MI.getOpcode()) && ScoreBrackets.hasPendingGDS())
+if (TII->isAlwaysGDS(Opc) && ScoreBrackets.hasPendingGDS())
   addWait(Wait, DS_CNT, ScoreBrackets.getPendingGDSWait());
 
 if (MI.isCall() && callWaitsOnFunctionEntry(MI)) {
@@ -1946,7 +1942,7 @@ bool 
SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   Wait);
 }
   }
-} else if (MI.getOpcode() == AMDGPU::S_BARRIER_WAIT) {
+} else if (Opc == AMDGPU::S_BARRIER_WAIT) {
   ScoreBrackets.tryClearSCCWriteEvent(&MI);
 } else {
   // FIXME: Should not be relying on memoperands.
@@ -2061,8 +2057,8 @@ bool 
SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   //
   // In all other cases, ensure safety by ensuring that there are no 
outstanding
   // memory operations.
-  if (MI.getOpcode() == AMDGPU::S_BARRIER &&
-  !ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) {
+  if (Opc == AMDGPU::S_BARRIER && !ST->hasAutoWaitcntBeforeBarrier() &&
+  !ST->supportsBackOffBarrier()) {
 Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/true));
   }
 
@@ -2146,19 +2142,19 @@ bool SIIns

[llvm-branch-commits] [flang] [mlir] [mlir][omp] Improve canonloop/iv naming (PR #159773)

2025-09-30 Thread Michael Kruse via llvm-branch-commits


@@ -0,0 +1,28 @@
+// RUN: fir-opt %s   | FileCheck %s --enable-var-scope

Meinersbur wrote:

Updated using scf.if

https://github.com/llvm/llvm-project/pull/159773
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits