[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)

2025-03-26 Thread Sam Tebbs via llvm-branch-commits

https://github.com/SamTebbs33 created 
https://github.com/llvm/llvm-project/pull/133090

This PR accounts for scaled reductions in `calculateRegisterUsage` to reflect 
the fact that the number of lanes in their output is smaller than the VF.

>From 6193c2c846710472c7e604ef33a15cda18771328 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Wed, 26 Mar 2025 14:01:59 +
Subject: [PATCH] [LV] Reduce register usage for scaled reductions

---
 .../Transforms/Vectorize/LoopVectorize.cpp|  24 +-
 .../Transforms/Vectorize/VPRecipeBuilder.h|   3 +-
 llvm/lib/Transforms/Vectorize/VPlan.h |  14 +-
 .../partial-reduce-dot-product-neon.ll|  60 ++-
 .../AArch64/partial-reduce-dot-product.ll | 414 ++
 5 files changed, 495 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp 
b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c9f314c0ba481..da701ef9ff1a2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5026,10 +5026,23 @@ calculateRegisterUsage(VPlan &Plan, 
ArrayRef VFs,
 // even in the scalar case.
 RegUsage[ClassID] += 1;
   } else {
+// The output from scaled phis and scaled reductions actually have
+// fewer lanes than the VF.
+auto VF = VFs[J];
+if (auto *ReductionR = dyn_cast(R))
+  VF = VF.divideCoefficientBy(ReductionR->getVFScaleFactor());
+else if (auto *PartialReductionR =
+ dyn_cast(R))
+  VF = VF.divideCoefficientBy(PartialReductionR->getScaleFactor());
+if (VF != VFs[J])
+  LLVM_DEBUG(dbgs() << "LV(REG): Scaled down VF from " << VFs[J]
+<< " to " << VF << " for ";
+ R->dump(););
+
 for (VPValue *DefV : R->definedValues()) {
   Type *ScalarTy = TypeInfo.inferScalarType(DefV);
   unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);
-  RegUsage[ClassID] += GetRegUsage(ScalarTy, VFs[J]);
+  RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
 }
   }
 }
@@ -8963,8 +8976,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
   if (isa(Instr) || isa(Instr))
 return tryToWidenMemory(Instr, Operands, Range);
 
-  if (getScalingForReduction(Instr))
-return tryToCreatePartialReduction(Instr, Operands);
+  if (auto ScaleFactor = getScalingForReduction(Instr))
+return tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value());
 
   if (!shouldWiden(Instr, Range))
 return nullptr;
@@ -8988,7 +9001,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
 
 VPRecipeBase *
 VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
- ArrayRef Operands) {
+ ArrayRef Operands,
+ unsigned ScaleFactor) {
   assert(Operands.size() == 2 &&
  "Unexpected number of operands for partial reduction");
 
@@ -9021,7 +9035,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction 
*Reduction,
 BinOp = Builder.createSelect(Mask, BinOp, Zero, Reduction->getDebugLoc());
   }
   return new VPPartialReductionRecipe(ReductionOpcode, BinOp, Accumulator,
-  Reduction);
+  ScaleFactor, Reduction);
 }
 
 void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h 
b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 334cfbad8bd7c..fd0064a34c4c9 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -178,7 +178,8 @@ class VPRecipeBuilder {
   /// Create and return a partial reduction recipe for a reduction instruction
   /// along with binary operation and reduction phi operands.
   VPRecipeBase *tryToCreatePartialReduction(Instruction *Reduction,
-ArrayRef Operands);
+ArrayRef Operands,
+unsigned ScaleFactor);
 
   /// Set the recipe created for given ingredient.
   void setRecipe(Instruction *I, VPRecipeBase *R) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h 
b/llvm/lib/Transforms/Vectorize/VPlan.h
index 80b3d2a760293..d84efb1bd6850 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2001,6 +2001,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
   /// Generate the phi/select nodes.
   void execute(VPTransformState &State) override;
 
+  unsigned getVFScaleFactor() const { return VFScaleFactor; }
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// Print the recipe.
   void print(ra

[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)

2025-03-26 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-vectorizers

Author: Sam Tebbs (SamTebbs33)


Changes

This PR accounts for scaled reductions in `calculateRegisterUsage` to reflect 
the fact that the number of lanes in their output is smaller than the VF.

---

Patch is 56.56 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/133090.diff


5 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+19-5) 
- (modified) llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h (+2-1) 
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+10-4) 
- (modified) 
llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll 
(+50-10) 
- (modified) 
llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll (+414) 


``diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp 
b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c9f314c0ba481..da701ef9ff1a2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5026,10 +5026,23 @@ calculateRegisterUsage(VPlan &Plan, 
ArrayRef VFs,
 // even in the scalar case.
 RegUsage[ClassID] += 1;
   } else {
+// The output from scaled phis and scaled reductions actually have
+// fewer lanes than the VF.
+auto VF = VFs[J];
+if (auto *ReductionR = dyn_cast(R))
+  VF = VF.divideCoefficientBy(ReductionR->getVFScaleFactor());
+else if (auto *PartialReductionR =
+ dyn_cast(R))
+  VF = VF.divideCoefficientBy(PartialReductionR->getScaleFactor());
+if (VF != VFs[J])
+  LLVM_DEBUG(dbgs() << "LV(REG): Scaled down VF from " << VFs[J]
+<< " to " << VF << " for ";
+ R->dump(););
+
 for (VPValue *DefV : R->definedValues()) {
   Type *ScalarTy = TypeInfo.inferScalarType(DefV);
   unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);
-  RegUsage[ClassID] += GetRegUsage(ScalarTy, VFs[J]);
+  RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
 }
   }
 }
@@ -8963,8 +8976,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
   if (isa(Instr) || isa(Instr))
 return tryToWidenMemory(Instr, Operands, Range);
 
-  if (getScalingForReduction(Instr))
-return tryToCreatePartialReduction(Instr, Operands);
+  if (auto ScaleFactor = getScalingForReduction(Instr))
+return tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value());
 
   if (!shouldWiden(Instr, Range))
 return nullptr;
@@ -8988,7 +9001,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
 
 VPRecipeBase *
 VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
- ArrayRef Operands) {
+ ArrayRef Operands,
+ unsigned ScaleFactor) {
   assert(Operands.size() == 2 &&
  "Unexpected number of operands for partial reduction");
 
@@ -9021,7 +9035,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction 
*Reduction,
 BinOp = Builder.createSelect(Mask, BinOp, Zero, Reduction->getDebugLoc());
   }
   return new VPPartialReductionRecipe(ReductionOpcode, BinOp, Accumulator,
-  Reduction);
+  ScaleFactor, Reduction);
 }
 
 void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h 
b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 334cfbad8bd7c..fd0064a34c4c9 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -178,7 +178,8 @@ class VPRecipeBuilder {
   /// Create and return a partial reduction recipe for a reduction instruction
   /// along with binary operation and reduction phi operands.
   VPRecipeBase *tryToCreatePartialReduction(Instruction *Reduction,
-ArrayRef Operands);
+ArrayRef Operands,
+unsigned ScaleFactor);
 
   /// Set the recipe created for given ingredient.
   void setRecipe(Instruction *I, VPRecipeBase *R) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h 
b/llvm/lib/Transforms/Vectorize/VPlan.h
index 80b3d2a760293..d84efb1bd6850 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2001,6 +2001,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
   /// Generate the phi/select nodes.
   void execute(VPTransformState &State) override;
 
+  unsigned getVFScaleFactor() const { return VFScaleFactor; }
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// Print the recipe.
   

[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)

2025-03-26 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Sam Tebbs (SamTebbs33)


Changes

This PR accounts for scaled reductions in `calculateRegisterUsage` to reflect 
the fact that the number of lanes in their output is smaller than the VF.

---

Patch is 56.56 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/133090.diff


5 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+19-5) 
- (modified) llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h (+2-1) 
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+10-4) 
- (modified) 
llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll 
(+50-10) 
- (modified) 
llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll (+414) 


``diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp 
b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c9f314c0ba481..da701ef9ff1a2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5026,10 +5026,23 @@ calculateRegisterUsage(VPlan &Plan, 
ArrayRef VFs,
 // even in the scalar case.
 RegUsage[ClassID] += 1;
   } else {
+// The output from scaled phis and scaled reductions actually have
+// fewer lanes than the VF.
+auto VF = VFs[J];
+if (auto *ReductionR = dyn_cast(R))
+  VF = VF.divideCoefficientBy(ReductionR->getVFScaleFactor());
+else if (auto *PartialReductionR =
+ dyn_cast(R))
+  VF = VF.divideCoefficientBy(PartialReductionR->getScaleFactor());
+if (VF != VFs[J])
+  LLVM_DEBUG(dbgs() << "LV(REG): Scaled down VF from " << VFs[J]
+<< " to " << VF << " for ";
+ R->dump(););
+
 for (VPValue *DefV : R->definedValues()) {
   Type *ScalarTy = TypeInfo.inferScalarType(DefV);
   unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);
-  RegUsage[ClassID] += GetRegUsage(ScalarTy, VFs[J]);
+  RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
 }
   }
 }
@@ -8963,8 +8976,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
   if (isa(Instr) || isa(Instr))
 return tryToWidenMemory(Instr, Operands, Range);
 
-  if (getScalingForReduction(Instr))
-return tryToCreatePartialReduction(Instr, Operands);
+  if (auto ScaleFactor = getScalingForReduction(Instr))
+return tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value());
 
   if (!shouldWiden(Instr, Range))
 return nullptr;
@@ -8988,7 +9001,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
 
 VPRecipeBase *
 VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
- ArrayRef Operands) {
+ ArrayRef Operands,
+ unsigned ScaleFactor) {
   assert(Operands.size() == 2 &&
  "Unexpected number of operands for partial reduction");
 
@@ -9021,7 +9035,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction 
*Reduction,
 BinOp = Builder.createSelect(Mask, BinOp, Zero, Reduction->getDebugLoc());
   }
   return new VPPartialReductionRecipe(ReductionOpcode, BinOp, Accumulator,
-  Reduction);
+  ScaleFactor, Reduction);
 }
 
 void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h 
b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 334cfbad8bd7c..fd0064a34c4c9 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -178,7 +178,8 @@ class VPRecipeBuilder {
   /// Create and return a partial reduction recipe for a reduction instruction
   /// along with binary operation and reduction phi operands.
   VPRecipeBase *tryToCreatePartialReduction(Instruction *Reduction,
-ArrayRef Operands);
+ArrayRef Operands,
+unsigned ScaleFactor);
 
   /// Set the recipe created for given ingredient.
   void setRecipe(Instruction *I, VPRecipeBase *R) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h 
b/llvm/lib/Transforms/Vectorize/VPlan.h
index 80b3d2a760293..d84efb1bd6850 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2001,6 +2001,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
   /// Generate the phi/select nodes.
   void execute(VPTransformState &State) override;
 
+  unsigned getVFScaleFactor() const { return VFScaleFactor; }
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// Print the recipe.

[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)

2025-03-26 Thread Nicholas Guy via llvm-branch-commits


@@ -5026,10 +5026,23 @@ calculateRegisterUsage(VPlan &Plan, 
ArrayRef VFs,
 // even in the scalar case.
 RegUsage[ClassID] += 1;
   } else {
+// The output from scaled phis and scaled reductions actually have
+// fewer lanes than the VF.
+auto VF = VFs[J];
+if (auto *ReductionR = dyn_cast(R))
+  VF = VF.divideCoefficientBy(ReductionR->getVFScaleFactor());
+else if (auto *PartialReductionR =
+ dyn_cast(R))
+  VF = VF.divideCoefficientBy(PartialReductionR->getScaleFactor());
+if (VF != VFs[J])

NickGuy-Arm wrote:

Nit: If the condition is only used for debug output then can it be moved to 
inside the LLVM_DEBUG

https://github.com/llvm/llvm-project/pull/133090
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Remove the need for _LIBCPP_TEMPLATE_VIS (PR #133010)

2025-03-26 Thread Louis Dionne via llvm-branch-commits

https://github.com/ldionne requested changes to this pull request.

I am fine with the spirit of this patch. I think it needs a release note though 
since we're making library-wide changes. We don't think those changes are going 
to affect anyone, but I think it's a nice courtesy to give our users and it 
doesn't cost us anything to call it out.

Also, I'd like this patch to be split into two. First, the changes that 
introduce `NAMESPACE_VIS` and that make `TEMPLATE_VIS` a no-op. Then, one or 
two weeks after landing the first patch, we can have a second patch that 
removes `_LIBCPP_TEMPLATE_VIS`, which is now a no-op. I think this will 
simplify things in case we need to revert the patch for an unforeseen reason, 
and it'll allow people who do wonky stuff (e.g. override `_LIBCPP_TEMPLATE_VIS` 
for their own purposes) to apply a temporary workaround while they figure out 
how not to do something unsupported. Again, that's low cost to us and it'll 
make things much easier for everyone in case things happen to go wrong, 
unexpectedly.

https://github.com/llvm/llvm-project/pull/133010
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Remove the need for _LIBCPP_TEMPLATE_VIS (PR #133010)

2025-03-26 Thread Louis Dionne via llvm-branch-commits

https://github.com/ldionne edited 
https://github.com/llvm/llvm-project/pull/133010
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Remove the need for _LIBCPP_TEMPLATE_VIS (PR #133010)

2025-03-26 Thread Louis Dionne via llvm-branch-commits


@@ -64,25 +64,13 @@ Visibility Macros
   ABI, we should create a new _LIBCPP_HIDE_FROM_ABI_AFTER_XXX macro, and we can

ldionne wrote:

Not attached to this line: you should update the `Overview` at the beginning of 
this file to reflect the latest state of things.

https://github.com/llvm/llvm-project/pull/133010
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)

2025-03-26 Thread Nicholas Guy via llvm-branch-commits

https://github.com/NickGuy-Arm commented:

Looks generally good to me so far, with a few nitpicks.

https://github.com/llvm/llvm-project/pull/133090
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Remove the need for _LIBCPP_TEMPLATE_VIS (PR #133010)

2025-03-26 Thread Louis Dionne via llvm-branch-commits




ldionne wrote:

The commit message should make it clear that this is a no-op on Clang.

https://github.com/llvm/llvm-project/pull/133010
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)

2025-03-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/132540

>From 53f6310e26cb02a18d99a9350ff8162ea0ed22b6 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Thu, 20 Mar 2025 20:15:07 +0300
Subject: [PATCH] [BOLT] Gadget scanner: Detect address materialization and
 arithmetics

In addition to authenticated pointers, consider the contents of a
register safe if it was
* written by PC-relative address computation
* updated by an arithmetic instruction whose input address is safe
---
 bolt/include/bolt/Core/MCPlusBuilder.h|  16 ++
 bolt/lib/Passes/PAuthGadgetScanner.cpp|  92 +--
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  30 +++
 .../AArch64/gs-pacret-autiasp.s   |  15 --
 .../gs-pauth-address-materialization.s| 228 ++
 .../binary-analysis/AArch64/lit.local.cfg |   3 +-
 6 files changed, 345 insertions(+), 39 deletions(-)
 create mode 100644 
bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h 
b/bolt/include/bolt/Core/MCPlusBuilder.h
index 8b6dc14121480..e94f82d00349a 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -587,6 +587,22 @@ class MCPlusBuilder {
 return getNoRegister();
   }
 
+  virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return getNoRegister();
+  }
+
+  /// Analyzes if this instruction can safely perform address arithmetics.
+  ///
+  /// If the first element of the returned pair is no-register, this 
instruction
+  /// is considered unknown. Otherwise, (output, input) pair is returned,
+  /// so that output is as trusted as input is.
+  virtual std::pair
+  analyzeSafeAddressArithmetics(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return std::make_pair(getNoRegister(), getNoRegister());
+  }
+
   virtual bool isTerminator(const MCInst &Inst) const;
 
   virtual bool isNoop(const MCInst &Inst) const {
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index a3b320c545734..16da08551a34d 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -335,6 +335,50 @@ class PacRetAnalysis
 });
   }
 
+  BitVector getClobberedRegs(const MCInst &Point) const {
+BitVector Clobbered(NumRegs, false);
+// Assume a call can clobber all registers, including callee-saved
+// registers. There's a good chance that callee-saved registers will be
+// saved on the stack at some point during execution of the callee.
+// Therefore they should also be considered as potentially modified by an
+// attacker/written to.
+// Also, not all functions may respect the AAPCS ABI rules about
+// caller/callee-saved registers.
+if (BC.MIB->isCall(Point))
+  Clobbered.set();
+else
+  BC.MIB->getClobberedRegs(Point, Clobbered);
+return Clobbered;
+  }
+
+  // Returns all registers that can be treated as if they are written by an
+  // authentication instruction.
+  SmallVector getAuthenticatedRegs(const MCInst &Point,
+  const State &Cur) const {
+SmallVector Regs;
+const MCPhysReg NoReg = BC.MIB->getNoRegister();
+
+// A signed pointer can be authenticated, or
+ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point);
+if (AutReg && *AutReg != NoReg)
+  Regs.push_back(*AutReg);
+
+// ... a safe address can be materialized, or
+MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point);
+if (NewAddrReg != NoReg)
+  Regs.push_back(NewAddrReg);
+
+// ... an address can be updated in a safe manner, producing the result
+// which is as trusted as the input address.
+MCPhysReg ArithResult, ArithSrc;
+std::tie(ArithResult, ArithSrc) =
+BC.MIB->analyzeSafeAddressArithmetics(Point);
+if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc])
+  Regs.push_back(ArithResult);
+
+return Regs;
+  }
+
   State computeNext(const MCInst &Point, const State &Cur) {
 PacStatePrinter P(BC);
 LLVM_DEBUG({
@@ -355,19 +399,20 @@ class PacRetAnalysis
   return State();
 }
 
+// First, compute various properties of the instruction, taking the state
+// before its execution into account, if necessary.
+
+BitVector Clobbered = getClobberedRegs(Point);
+// Compute the set of registers that can be considered as written by
+// an authentication instruction. This includes operations that are
+// *strictly better* than authentication, such as materializing a
+// PC-relative constant.
+SmallVector AuthenticatedOrBetter =
+getAuthenticatedRegs(Point, Cur);
+
+// Then, compute the state after this instruction is executed.
 State Next = Cur;
-BitVector Clobbered(NumRegs, false);
-// Assume a call can clo

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)

2025-03-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/132540

>From 53f6310e26cb02a18d99a9350ff8162ea0ed22b6 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Thu, 20 Mar 2025 20:15:07 +0300
Subject: [PATCH] [BOLT] Gadget scanner: Detect address materialization and
 arithmetics

In addition to authenticated pointers, consider the contents of a
register safe if it was
* written by PC-relative address computation
* updated by an arithmetic instruction whose input address is safe
---
 bolt/include/bolt/Core/MCPlusBuilder.h|  16 ++
 bolt/lib/Passes/PAuthGadgetScanner.cpp|  92 +--
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  30 +++
 .../AArch64/gs-pacret-autiasp.s   |  15 --
 .../gs-pauth-address-materialization.s| 228 ++
 .../binary-analysis/AArch64/lit.local.cfg |   3 +-
 6 files changed, 345 insertions(+), 39 deletions(-)
 create mode 100644 
bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h 
b/bolt/include/bolt/Core/MCPlusBuilder.h
index 8b6dc14121480..e94f82d00349a 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -587,6 +587,22 @@ class MCPlusBuilder {
 return getNoRegister();
   }
 
+  virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return getNoRegister();
+  }
+
+  /// Analyzes if this instruction can safely perform address arithmetics.
+  ///
+  /// If the first element of the returned pair is no-register, this 
instruction
+  /// is considered unknown. Otherwise, (output, input) pair is returned,
+  /// so that output is as trusted as input is.
+  virtual std::pair
+  analyzeSafeAddressArithmetics(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return std::make_pair(getNoRegister(), getNoRegister());
+  }
+
   virtual bool isTerminator(const MCInst &Inst) const;
 
   virtual bool isNoop(const MCInst &Inst) const {
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index a3b320c545734..16da08551a34d 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -335,6 +335,50 @@ class PacRetAnalysis
 });
   }
 
+  BitVector getClobberedRegs(const MCInst &Point) const {
+BitVector Clobbered(NumRegs, false);
+// Assume a call can clobber all registers, including callee-saved
+// registers. There's a good chance that callee-saved registers will be
+// saved on the stack at some point during execution of the callee.
+// Therefore they should also be considered as potentially modified by an
+// attacker/written to.
+// Also, not all functions may respect the AAPCS ABI rules about
+// caller/callee-saved registers.
+if (BC.MIB->isCall(Point))
+  Clobbered.set();
+else
+  BC.MIB->getClobberedRegs(Point, Clobbered);
+return Clobbered;
+  }
+
+  // Returns all registers that can be treated as if they are written by an
+  // authentication instruction.
+  SmallVector getAuthenticatedRegs(const MCInst &Point,
+  const State &Cur) const {
+SmallVector Regs;
+const MCPhysReg NoReg = BC.MIB->getNoRegister();
+
+// A signed pointer can be authenticated, or
+ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point);
+if (AutReg && *AutReg != NoReg)
+  Regs.push_back(*AutReg);
+
+// ... a safe address can be materialized, or
+MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point);
+if (NewAddrReg != NoReg)
+  Regs.push_back(NewAddrReg);
+
+// ... an address can be updated in a safe manner, producing the result
+// which is as trusted as the input address.
+MCPhysReg ArithResult, ArithSrc;
+std::tie(ArithResult, ArithSrc) =
+BC.MIB->analyzeSafeAddressArithmetics(Point);
+if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc])
+  Regs.push_back(ArithResult);
+
+return Regs;
+  }
+
   State computeNext(const MCInst &Point, const State &Cur) {
 PacStatePrinter P(BC);
 LLVM_DEBUG({
@@ -355,19 +399,20 @@ class PacRetAnalysis
   return State();
 }
 
+// First, compute various properties of the instruction, taking the state
+// before its execution into account, if necessary.
+
+BitVector Clobbered = getClobberedRegs(Point);
+// Compute the set of registers that can be considered as written by
+// an authentication instruction. This includes operations that are
+// *strictly better* than authentication, such as materializing a
+// PC-relative constant.
+SmallVector AuthenticatedOrBetter =
+getAuthenticatedRegs(Point, Cur);
+
+// Then, compute the state after this instruction is executed.
 State Next = Cur;
-BitVector Clobbered(NumRegs, false);
-// Assume a call can clo

[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)

2025-03-26 Thread Nicholas Guy via llvm-branch-commits




NickGuy-Arm wrote:

Could you pre-commit this test, so we can see how the output changes before and 
after the changes in LoopVectorize.cpp

https://github.com/llvm/llvm-project/pull/133090
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)

2025-03-26 Thread Nicholas Guy via llvm-branch-commits

https://github.com/NickGuy-Arm edited 
https://github.com/llvm/llvm-project/pull/133090
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)

2025-03-26 Thread Anatoly Trosinenko via llvm-branch-commits


@@ -0,0 +1,228 @@
+// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe
+// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s
+
+// Test various patterns that should or should not be considered safe
+// materialization of PC-relative addresses.
+//
+// Note that while "instructions that write to the affected registers"
+// section of the report is still technically correct, it does not necessarily
+// mentions the instructions that are used incorrectly.
+//
+// FIXME: Switch to PAC* instructions instead of indirect tail call for testing
+//if a register is considered safe when detection of signing oracles is
+//implemented, as it is more traditional usage of PC-relative 
constants.
+//Moreover, using PAC instructions would improve test robustness, as
+//handling of *calls* can be influenced by what BOLT classifies as a
+//tail call, for example.
+
+.text
+

atrosinenko wrote:

Added `good_negative_offset` test case, thanks!

https://github.com/llvm/llvm-project/pull/132540
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [TailDuplicator] Determine if computed gotos using `blockaddress` (#132536) (PR #133082)

2025-03-26 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/133082
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [TailDuplicator] Determine if computed gotos using `blockaddress` (#132536) (PR #133082)

2025-03-26 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/133082

Backport 66f158d91803875de63d8f2a437ce8ecb22c4141

Requested by: @dianqk

>From 8241e69958028a64b9246986ee97b2d73ac48df4 Mon Sep 17 00:00:00 2001
From: dianqk 
Date: Wed, 26 Mar 2025 21:27:43 +0800
Subject: [PATCH] [TailDuplicator] Determine if computed gotos using
 `blockaddress` (#132536)

Using `blockaddress` should be more reliable than determining if an
operand comes from a jump table index.

Alternative: Add the `MachineInstr::MIFlag::ComputedGoto` flag when
lowering `indirectbr`. But I don't think this approach is suitable to
backport.

(cherry picked from commit 66f158d91803875de63d8f2a437ce8ecb22c4141)
---
 llvm/include/llvm/CodeGen/MachineBasicBlock.h |   9 +
 llvm/include/llvm/CodeGen/MachineInstr.h  |  16 +-
 llvm/lib/CodeGen/TailDuplicator.cpp   |   2 +-
 .../CodeGen/X86/tail-dup-computed-goto.mir| 265 +-
 4 files changed, 203 insertions(+), 89 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h 
b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 0b803a9724742..11efb2f656a7a 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -311,6 +311,15 @@ class MachineBasicBlock
   const MachineFunction *getParent() const { return xParent; }
   MachineFunction *getParent() { return xParent; }
 
+  /// Returns true if the original IR terminator is an `indirectbr`. This
+  /// typically corresponds to a `goto` in C, rather than jump tables.
+  bool terminatorIsComputedGoto() const {
+return back().isIndirectBranch() &&
+   llvm::all_of(successors(), [](const MachineBasicBlock *Succ) {
+ return Succ->isIRBlockAddressTaken();
+   });
+  }
+
   using instr_iterator = Instructions::iterator;
   using const_instr_iterator = Instructions::const_iterator;
   using reverse_instr_iterator = Instructions::reverse_iterator;
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h 
b/llvm/include/llvm/CodeGen/MachineInstr.h
index b26cabe801ee8..997d6a5554e06 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -994,17 +994,8 @@ class MachineInstr
 
   /// Return true if this is an indirect branch, such as a
   /// branch through a register.
-  bool isIndirectBranch(QueryType Type = AnyInBundle,
-bool IncludeJumpTable = true) const {
-return hasProperty(MCID::IndirectBranch, Type) &&
-   (IncludeJumpTable || !llvm::any_of(operands(), [](const auto &Op) {
-  return Op.isJTI();
-}));
-  }
-
-  bool isComputedGoto(QueryType Type = AnyInBundle) const {
-// Jump tables are not considered computed gotos.
-return isIndirectBranch(Type, /*IncludeJumpTable=*/false);
+  bool isIndirectBranch(QueryType Type = AnyInBundle) const {
+return hasProperty(MCID::IndirectBranch, Type);
   }
 
   /// Return true if this is a branch which may fall
@@ -2088,6 +2079,9 @@ class MachineInstr
 MCSymbol *PreInstrSymbol, MCSymbol *PostInstrSymbol,
 MDNode *HeapAllocMarker, MDNode *PCSections,
 uint32_t CFIType, MDNode *MMRAs);
+
+  /// Returns true if all successors are IRBlockAddressTaken.
+  bool jumpToIRBlockAddressTaken() const;
 };
 
 /// Special DenseMapInfo traits to compare MachineInstr* by *value* of the
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp 
b/llvm/lib/CodeGen/TailDuplicator.cpp
index 21f75458c90f3..b0de3c322ddd0 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -604,7 +604,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   bool HasComputedGoto = false;
   if (!TailBB.empty()) {
 HasIndirectbr = TailBB.back().isIndirectBranch();
-HasComputedGoto = TailBB.back().isComputedGoto();
+HasComputedGoto = TailBB.terminatorIsComputedGoto();
   }
 
   if (HasIndirectbr && PreRegAlloc)
diff --git a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir 
b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
index a472dc67d8d51..17de405928d37 100644
--- a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
+++ b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
@@ -2,15 +2,27 @@
 # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication 
-tail-dup-pred-size=1 -tail-dup-succ-size=1 %s -o - | FileCheck %s
 # Check that only the computed goto is not be restrict by tail-dup-pred-size 
and tail-dup-succ-size.
 --- |
+  @computed_goto.dispatch = constant [5 x ptr] [ptr null, ptr 
blockaddress(@computed_goto, %bb1), ptr blockaddress(@computed_goto, %bb2), ptr 
blockaddress(@computed_goto, %bb3), ptr blockaddress(@computed_goto, %bb4)]
   declare i64 @f0()
   declare i64 @f1()
   declare i64 @f2()
   declare i64 @f3()
   declare i64 @f4()
   declare i64 @f5()
-  @computed_goto.dispatch = external global [5 x ptr]
-  define void @computed_goto() { ret void }
+  defi

[llvm-branch-commits] [llvm] release/20.x: [TailDuplicator] Determine if computed gotos using `blockaddress` (#132536) (PR #133082)

2025-03-26 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: None (llvmbot)


Changes

Backport 66f158d91803875de63d8f2a437ce8ecb22c4141

Requested by: @dianqk

---

Patch is 29.24 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/133082.diff


4 Files Affected:

- (modified) llvm/include/llvm/CodeGen/MachineBasicBlock.h (+9) 
- (modified) llvm/include/llvm/CodeGen/MachineInstr.h (+5-11) 
- (modified) llvm/lib/CodeGen/TailDuplicator.cpp (+1-1) 
- (modified) llvm/test/CodeGen/X86/tail-dup-computed-goto.mir (+188-77) 


``diff
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h 
b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 0b803a9724742..11efb2f656a7a 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -311,6 +311,15 @@ class MachineBasicBlock
   const MachineFunction *getParent() const { return xParent; }
   MachineFunction *getParent() { return xParent; }
 
+  /// Returns true if the original IR terminator is an `indirectbr`. This
+  /// typically corresponds to a `goto` in C, rather than jump tables.
+  bool terminatorIsComputedGoto() const {
+return back().isIndirectBranch() &&
+   llvm::all_of(successors(), [](const MachineBasicBlock *Succ) {
+ return Succ->isIRBlockAddressTaken();
+   });
+  }
+
   using instr_iterator = Instructions::iterator;
   using const_instr_iterator = Instructions::const_iterator;
   using reverse_instr_iterator = Instructions::reverse_iterator;
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h 
b/llvm/include/llvm/CodeGen/MachineInstr.h
index b26cabe801ee8..997d6a5554e06 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -994,17 +994,8 @@ class MachineInstr
 
   /// Return true if this is an indirect branch, such as a
   /// branch through a register.
-  bool isIndirectBranch(QueryType Type = AnyInBundle,
-bool IncludeJumpTable = true) const {
-return hasProperty(MCID::IndirectBranch, Type) &&
-   (IncludeJumpTable || !llvm::any_of(operands(), [](const auto &Op) {
-  return Op.isJTI();
-}));
-  }
-
-  bool isComputedGoto(QueryType Type = AnyInBundle) const {
-// Jump tables are not considered computed gotos.
-return isIndirectBranch(Type, /*IncludeJumpTable=*/false);
+  bool isIndirectBranch(QueryType Type = AnyInBundle) const {
+return hasProperty(MCID::IndirectBranch, Type);
   }
 
   /// Return true if this is a branch which may fall
@@ -2088,6 +2079,9 @@ class MachineInstr
 MCSymbol *PreInstrSymbol, MCSymbol *PostInstrSymbol,
 MDNode *HeapAllocMarker, MDNode *PCSections,
 uint32_t CFIType, MDNode *MMRAs);
+
+  /// Returns true if all successors are IRBlockAddressTaken.
+  bool jumpToIRBlockAddressTaken() const;
 };
 
 /// Special DenseMapInfo traits to compare MachineInstr* by *value* of the
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp 
b/llvm/lib/CodeGen/TailDuplicator.cpp
index 21f75458c90f3..b0de3c322ddd0 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -604,7 +604,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   bool HasComputedGoto = false;
   if (!TailBB.empty()) {
 HasIndirectbr = TailBB.back().isIndirectBranch();
-HasComputedGoto = TailBB.back().isComputedGoto();
+HasComputedGoto = TailBB.terminatorIsComputedGoto();
   }
 
   if (HasIndirectbr && PreRegAlloc)
diff --git a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir 
b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
index a472dc67d8d51..17de405928d37 100644
--- a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
+++ b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
@@ -2,15 +2,27 @@
 # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication 
-tail-dup-pred-size=1 -tail-dup-succ-size=1 %s -o - | FileCheck %s
 # Check that only the computed goto is not be restrict by tail-dup-pred-size 
and tail-dup-succ-size.
 --- |
+  @computed_goto.dispatch = constant [5 x ptr] [ptr null, ptr 
blockaddress(@computed_goto, %bb1), ptr blockaddress(@computed_goto, %bb2), ptr 
blockaddress(@computed_goto, %bb3), ptr blockaddress(@computed_goto, %bb4)]
   declare i64 @f0()
   declare i64 @f1()
   declare i64 @f2()
   declare i64 @f3()
   declare i64 @f4()
   declare i64 @f5()
-  @computed_goto.dispatch = external global [5 x ptr]
-  define void @computed_goto() { ret void }
+  define void @computed_goto() {
+start:
+  ret void
+bb1:
+  ret void
+bb2:
+  ret void
+bb3:
+  ret void
+bb4:
+  ret void
+  }
   define void @jump_table() { ret void }
+  define void @jump_table_pic() { ret void }
 ...
 ---
 name:computed_goto
@@ -23,98 +35,88 @@ body: |
   ; CHECK-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, 
implicit-def de

[llvm-branch-commits] [llvm] release/20.x: [TailDuplicator] Determine if computed gotos using `blockaddress` (#132536) (PR #133082)

2025-03-26 Thread via llvm-branch-commits

llvmbot wrote:

@arsenm What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/133082
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] cf9a10d - Revert "[MLIR][NVGPU] Use `gpu.dynamic_shared_memory` in tests (#133051)"

2025-03-26 Thread via llvm-branch-commits

Author: Karlo Basioli
Date: 2025-03-26T15:12:55Z
New Revision: cf9a10d4665d71acd86de0b98cbaf25dd9dd03cc

URL: 
https://github.com/llvm/llvm-project/commit/cf9a10d4665d71acd86de0b98cbaf25dd9dd03cc
DIFF: 
https://github.com/llvm/llvm-project/commit/cf9a10d4665d71acd86de0b98cbaf25dd9dd03cc.diff

LOG: Revert "[MLIR][NVGPU] Use `gpu.dynamic_shared_memory` in tests (#133051)"

This reverts commit 15f5a7a3ec71c624cea0cbdf02e3c5205ba81d9d.

Added: 


Modified: 
mlir/test/Integration/GPU/CUDA/sm90/gemm_f32_f16_f16_128x128x128.mlir
mlir/test/Integration/GPU/CUDA/sm90/gemm_pred_f32_f16_f16_128x128x128.mlir
mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir

Removed: 




diff  --git 
a/mlir/test/Integration/GPU/CUDA/sm90/gemm_f32_f16_f16_128x128x128.mlir 
b/mlir/test/Integration/GPU/CUDA/sm90/gemm_f32_f16_f16_128x128x128.mlir
index 07324c603012a..1c5cf73db6eba 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/gemm_f32_f16_f16_128x128x128.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/gemm_f32_f16_f16_128x128x128.mlir
@@ -141,18 +141,14 @@ func.func @main() {
 %c16 = arith.constant 16 : index
 %c4096 = arith.constant 4096 : index
 %c8 = arith.constant 8 : index
-%txcount = arith.constant 32768 : index
-%c24576 = arith.constant 24576 : index
-%c16384 = arith.constant 16384 : index
-%c49152 = arith.constant 49152 : index
-%c57344 = arith.constant 57344 : index
+%txcount = arith.constant 32768 : index 
 
 %tidx = gpu.thread_id  x
 %dynamicMem = memref.get_global @dynamicShmem : memref<0xf16, 3>
 %lhsShmem = memref.reinterpret_cast %dynamicMem to offset: [0], sizes: [2, 
128, 64], strides: [8192, 64, 1] : memref<0xf16, 3> to memref<2x128x64xf16, 3>
 %rhsShmem2 = memref.reinterpret_cast %dynamicMem to offset: [0], sizes: 
[4, 64, 128],  strides: [8192,128,1] : memref<0xf16, 3> to 
memref<4x64x128xf16,3>
 %rhsShmem = memref.subview %rhsShmem2[2, 0, 0][2, 64, 128][1, 1, 1] : 
memref<4x64x128xf16,3> to memref<2x64x128xf16, strided<[8192, 128, 1], offset: 
16384>, 3>
-%dynsmem = gpu.dynamic_shared_memory : memref>
+
 // Step 1. [GPU] Create Async Transactional Barriers (mbarriers)
 %barrier = nvgpu.mbarrier.create -> !barrierType
 %cnd = arith.cmpi eq, %tidx, %c0 : index
@@ -165,29 +161,31 @@ func.func @main() {
 nvgpu.tma.prefetch.descriptor %descA : !lhsTensorMap
 nvgpu.tma.prefetch.descriptor %descB : !rhsTensorMap
 
-// Step 4.1 [GPU] TMA Load Pipeline 1   
+// Step 4.1 [GPU] TMA Load Pipeline 1   
 scf.if %cnd {
   %pipe = arith.constant 0 : index
-  %lhsSlice = memref.view %dynsmem[%c0][] : memref> to memref<128x64xf16, 
#gpu.address_space>
-  %halfFirst = memref.view %dynsmem[%c16384][] : memref> to memref<64x64xf16, 
#gpu.address_space>
-  %halfSecond = memref.view %dynsmem[%c24576][] : memref> to memref<64x64xf16, 
#gpu.address_space>
+  %lhsSlice = memref.subview %lhsShmem[0, 0, 0][1, 128, 64][1, 1, 1] : 
memref<2x128x64xf16, 3> to memref<128x64xf16, 3>
+  %rhsSlice = memref.subview %rhsShmem[0, 0, 0][1, 64, 128][1, 1, 1] : 
memref<2x64x128xf16, strided<[8192, 128, 1], offset: 16384>, 3> to 
memref<64x128xf16, strided<[128, 1], offset: 16384>, 3>
+  %halfFirst = memref.subview %rhsSlice[0, 0][64, 64][1, 1] : 
memref<64x128xf16, strided<[128, 1], offset: 16384>, 3> to memref<64x64xf16, 
strided<[128, 1], offset: 16384>, 3>
+  %halfSecond = memref.subview %rhsSlice[32, 0][64, 64][1, 1] : 
memref<64x128xf16, strided<[128, 1], offset: 16384>, 3> to memref<64x64xf16, 
strided<[128, 1], offset: 20480>, 3>
   nvgpu.mbarrier.arrive.expect_tx %barrier[%pipe], %txcount : !barrierType 
   
   %dim = arith.muli %pipe, %c64 : index
-  nvgpu.tma.async.load %descA[%dim, %c0], %barrier[%pipe] to %lhsSlice : 
!lhsTensorMap, !barrierType ->  memref<128x64xf16, 
#gpu.address_space>
-  nvgpu.tma.async.load %descB[%c0, %dim], %barrier[%pipe] to %halfFirst : 
!rhsTensorMap, !barrierType -> memref<64x64xf16, #gpu.address_space>
-  nvgpu.tma.async.load %descB[%c64, %dim], %barrier[%pipe] to %halfSecond 
: !rhsTensorMap, !barrierType -> memref<64x64xf16, 
#gpu.address_space>
+  nvgpu.tma.async.load %descA[%dim, %c0], %barrier[%pipe] to %lhsSlice : 
!lhsTensorMap, !barrierType -> memref<128x64xf16, 3>
+  nvgpu.tma.async.load %descB[%c0, %dim], %barrier[%pipe] to %halfFirst : 
!rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1], offset: 
16384>, 3>
+  nvgpu.tma.async.load %descB[%c64, %dim], %barrier[%pipe] to %halfSecond 
: !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1], offset: 
20480>, 3>
 }
 // Step 4.2 [GPU] TMA Load Pipeline 2
 scf.if %cnd {
   %pipe = arith.constant 1 : index
-  %lhsSlice = memref.view %dynsmem[%c32768][] : memref> to memref<128x64xf16, 
#gpu.address_space>
-  %halfFirst =

[llvm-branch-commits] [libcxx] [libc++] Remove the need for _LIBCPP_TEMPLATE_VIS (PR #133010)

2025-03-26 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-libcxx

Author: Nikolas Klauser (philnik777)


Changes

Since almost all of our symbols are explicitly annotated with visibility 
macros, this makes almost no difference to the actual visibility of symbols. 
Not having to annotate types at the cost of having a few more symbols with 
default visibility on GCC seems like the right choice to me.


---

Patch is 322.31 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/133010.diff


261 Files Affected:

- (modified) libcxx/.clang-format (-1) 
- (modified) libcxx/docs/DesignDocs/VisibilityMacros.rst (+2-14) 
- (modified) libcxx/include/__chrono/duration.h (+5-5) 
- (modified) libcxx/include/__chrono/formatter.h (+22-22) 
- (modified) libcxx/include/__chrono/parser_std_format_spec.h (+1-1) 
- (modified) libcxx/include/__chrono/time_point.h (+2-3) 
- (modified) libcxx/include/__compare/common_comparison_category.h (+1-1) 
- (modified) libcxx/include/__compare/compare_three_way.h (+1-1) 
- (modified) libcxx/include/__compare/compare_three_way_result.h (+1-2) 
- (modified) libcxx/include/__config (+6-12) 
- (modified) libcxx/include/__coroutine/coroutine_handle.h (+3-3) 
- (modified) libcxx/include/__coroutine/noop_coroutine_handle.h (+1-1) 
- (modified) libcxx/include/__format/buffer.h (+11-11) 
- (modified) libcxx/include/__format/container_adaptor.h (+4-4) 
- (modified) libcxx/include/__format/format_arg.h (+3-3) 
- (modified) libcxx/include/__format/format_arg_store.h (+1-1) 
- (modified) libcxx/include/__format/format_args.h (+1-1) 
- (modified) libcxx/include/__format/format_context.h (+3-8) 
- (modified) libcxx/include/__format/format_functions.h (+5-5) 
- (modified) libcxx/include/__format/format_parse_context.h (+1-1) 
- (modified) libcxx/include/__format/format_string.h (+1-1) 
- (modified) libcxx/include/__format/format_to_n_result.h (+1-1) 
- (modified) libcxx/include/__format/formatter.h (+1-1) 
- (modified) libcxx/include/__format/formatter_bool.h (+1-1) 
- (modified) libcxx/include/__format/formatter_char.h (+4-4) 
- (modified) libcxx/include/__format/formatter_floating_point.h (+5-5) 
- (modified) libcxx/include/__format/formatter_integer.h (+13-13) 
- (modified) libcxx/include/__format/formatter_integral.h (+3-3) 
- (modified) libcxx/include/__format/formatter_pointer.h (+4-4) 
- (modified) libcxx/include/__format/formatter_string.h (+6-6) 
- (modified) libcxx/include/__format/formatter_tuple.h (+3-5) 
- (modified) libcxx/include/__format/parser_std_format_spec.h (+1-1) 
- (modified) libcxx/include/__format/range_default_formatter.h (+7-7) 
- (modified) libcxx/include/__format/range_formatter.h (+1-1) 
- (modified) libcxx/include/__functional/binary_function.h (+1-1) 
- (modified) libcxx/include/__functional/binary_negate.h (+1-1) 
- (modified) libcxx/include/__functional/binder1st.h (+2-2) 
- (modified) libcxx/include/__functional/binder2nd.h (+2-2) 
- (modified) libcxx/include/__functional/boyer_moore_searcher.h (+2-2) 
- (modified) libcxx/include/__functional/default_searcher.h (+1-1) 
- (modified) libcxx/include/__functional/function.h (+3-3) 
- (modified) libcxx/include/__functional/hash.h (+26-26) 
- (modified) libcxx/include/__functional/mem_fun_ref.h (+8-9) 
- (modified) libcxx/include/__functional/operations.h (+38-38) 
- (modified) libcxx/include/__functional/pointer_to_binary_function.h (+1-2) 
- (modified) libcxx/include/__functional/pointer_to_unary_function.h (+1-2) 
- (modified) libcxx/include/__functional/reference_wrapper.h (+1-1) 
- (modified) libcxx/include/__functional/unary_function.h (+1-1) 
- (modified) libcxx/include/__functional/unary_negate.h (+1-2) 
- (modified) libcxx/include/__fwd/array.h (+1-1) 
- (modified) libcxx/include/__fwd/complex.h (+1-1) 
- (modified) libcxx/include/__fwd/deque.h (+1-1) 
- (modified) libcxx/include/__fwd/format.h (+3-3) 
- (modified) libcxx/include/__fwd/fstream.h (+4-4) 
- (modified) libcxx/include/__fwd/functional.h (+3-3) 
- (modified) libcxx/include/__fwd/ios.h (+1-1) 
- (modified) libcxx/include/__fwd/istream.h (+2-2) 
- (modified) libcxx/include/__fwd/map.h (+2-2) 
- (modified) libcxx/include/__fwd/memory.h (+2-2) 
- (modified) libcxx/include/__fwd/memory_resource.h (+1-1) 
- (modified) libcxx/include/__fwd/ostream.h (+1-1) 
- (modified) libcxx/include/__fwd/pair.h (+1-1) 
- (modified) libcxx/include/__fwd/queue.h (+2-2) 
- (modified) libcxx/include/__fwd/set.h (+2-2) 
- (modified) libcxx/include/__fwd/sstream.h (+4-4) 
- (modified) libcxx/include/__fwd/stack.h (+1-1) 
- (modified) libcxx/include/__fwd/streambuf.h (+1-1) 
- (modified) libcxx/include/__fwd/string.h (+2-2) 
- (modified) libcxx/include/__fwd/string_view.h (+1-1) 
- (modified) libcxx/include/__fwd/subrange.h (+1-1) 
- (modified) libcxx/include/__fwd/tuple.h (+3-3) 
- (modified) libcxx/include/__fwd/variant.h (+3-3) 
- (modified) libcxx/include/__fwd/vector.h (+1-1) 
- (modified) libcxx/include/__hash_table (+22-22) 
- (modified) libcxx/include/_

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)

2025-03-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/132540

>From 774ecf8d94cb7acff292af5b16655a62b8b23201 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Thu, 20 Mar 2025 20:15:07 +0300
Subject: [PATCH] [BOLT] Gadget scanner: Detect address materialization and
 arithmetics

In addition to authenticated pointers, consider the contents of a
register safe if it was
* written by PC-relative address computation
* updated by an arithmetic instruction whose input address is safe
---
 bolt/include/bolt/Core/MCPlusBuilder.h|  16 ++
 bolt/lib/Passes/PAuthGadgetScanner.cpp|  92 +--
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  30 +++
 .../AArch64/gs-pacret-autiasp.s   |  15 --
 .../gs-pauth-address-materialization.s| 228 ++
 .../binary-analysis/AArch64/lit.local.cfg |   3 +-
 6 files changed, 345 insertions(+), 39 deletions(-)
 create mode 100644 
bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h 
b/bolt/include/bolt/Core/MCPlusBuilder.h
index 8b6dc14121480..e94f82d00349a 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -587,6 +587,22 @@ class MCPlusBuilder {
 return getNoRegister();
   }
 
+  virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return getNoRegister();
+  }
+
+  /// Analyzes if this instruction can safely perform address arithmetics.
+  ///
+  /// If the first element of the returned pair is no-register, this 
instruction
+  /// is considered unknown. Otherwise, (output, input) pair is returned,
+  /// so that output is as trusted as input is.
+  virtual std::pair
+  analyzeSafeAddressArithmetics(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return std::make_pair(getNoRegister(), getNoRegister());
+  }
+
   virtual bool isTerminator(const MCInst &Inst) const;
 
   virtual bool isNoop(const MCInst &Inst) const {
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index a3b320c545734..16da08551a34d 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -335,6 +335,50 @@ class PacRetAnalysis
 });
   }
 
+  BitVector getClobberedRegs(const MCInst &Point) const {
+BitVector Clobbered(NumRegs, false);
+// Assume a call can clobber all registers, including callee-saved
+// registers. There's a good chance that callee-saved registers will be
+// saved on the stack at some point during execution of the callee.
+// Therefore they should also be considered as potentially modified by an
+// attacker/written to.
+// Also, not all functions may respect the AAPCS ABI rules about
+// caller/callee-saved registers.
+if (BC.MIB->isCall(Point))
+  Clobbered.set();
+else
+  BC.MIB->getClobberedRegs(Point, Clobbered);
+return Clobbered;
+  }
+
+  // Returns all registers that can be treated as if they are written by an
+  // authentication instruction.
+  SmallVector getAuthenticatedRegs(const MCInst &Point,
+  const State &Cur) const {
+SmallVector Regs;
+const MCPhysReg NoReg = BC.MIB->getNoRegister();
+
+// A signed pointer can be authenticated, or
+ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point);
+if (AutReg && *AutReg != NoReg)
+  Regs.push_back(*AutReg);
+
+// ... a safe address can be materialized, or
+MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point);
+if (NewAddrReg != NoReg)
+  Regs.push_back(NewAddrReg);
+
+// ... an address can be updated in a safe manner, producing the result
+// which is as trusted as the input address.
+MCPhysReg ArithResult, ArithSrc;
+std::tie(ArithResult, ArithSrc) =
+BC.MIB->analyzeSafeAddressArithmetics(Point);
+if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc])
+  Regs.push_back(ArithResult);
+
+return Regs;
+  }
+
   State computeNext(const MCInst &Point, const State &Cur) {
 PacStatePrinter P(BC);
 LLVM_DEBUG({
@@ -355,19 +399,20 @@ class PacRetAnalysis
   return State();
 }
 
+// First, compute various properties of the instruction, taking the state
+// before its execution into account, if necessary.
+
+BitVector Clobbered = getClobberedRegs(Point);
+// Compute the set of registers that can be considered as written by
+// an authentication instruction. This includes operations that are
+// *strictly better* than authentication, such as materializing a
+// PC-relative constant.
+SmallVector AuthenticatedOrBetter =
+getAuthenticatedRegs(Point, Cur);
+
+// Then, compute the state after this instruction is executed.
 State Next = Cur;
-BitVector Clobbered(NumRegs, false);
-// Assume a call can clo

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)

2025-03-26 Thread Jacob Bramley via llvm-branch-commits


@@ -0,0 +1,228 @@
+// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe
+// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s
+
+// Test various patterns that should or should not be considered safe
+// materialization of PC-relative addresses.
+//
+// Note that while "instructions that write to the affected registers"
+// section of the report is still technically correct, it does not necessarily
+// mentions the instructions that are used incorrectly.
+//
+// FIXME: Switch to PAC* instructions instead of indirect tail call for testing
+//if a register is considered safe when detection of signing oracles is
+//implemented, as it is more traditional usage of PC-relative 
constants.
+//Moreover, using PAC instructions would improve test robustness, as
+//handling of *calls* can be influenced by what BOLT classifies as a
+//tail call, for example.
+
+.text
+
+// Define a function that is reachable by ADR instruction.
+.type   sym,@function
+sym:
+ret
+.size   sym, .-sym
+
+.globl  good_adr
+.type   good_adr,@function
+good_adr:
+// CHECK-NOT: good_adr
+adr x0, sym
+br  x0
+.size   good_adr, .-good_adr
+
+.globl  good_adrp
+.type   good_adrp,@function
+good_adrp:
+// CHECK-NOT: good_adrp
+adrpx0, sym
+br  x0
+.size   good_adrp, .-good_adrp
+
+.globl  good_adrp_add
+.type   good_adrp_add,@function
+good_adrp_add:
+// CHECK-NOT: good_adrp_add
+adrpx0, sym
+add x0, x0, :lo12:sym
+br  x0
+.size   good_adrp_add, .-good_adrp_add
+
+.globl  good_adrp_add_with_const_offset
+.type   good_adrp_add_with_const_offset,@function
+good_adrp_add_with_const_offset:
+// CHECK-NOT: good_adrp_add_with_const_offset
+adrpx0, sym
+add x0, x0, :lo12:sym
+add x0, x0, #8
+br  x0
+.size   good_adrp_add_with_const_offset, 
.-good_adrp_add_with_const_offset
+
+.globl  bad_adrp_with_nonconst_offset
+.type   bad_adrp_with_nonconst_offset,@function
+bad_adrp_with_nonconst_offset:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function 
bad_adrp_with_nonconst_offset, basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is {{[0-9a-f]+}}:  br  x0 # 
TAILCALL
+// CHECK-NEXT:  The 1 instructions that write to the affected registers after 
any authentication are:
+// CHECK-NEXT:  1. {{[0-9a-f]+}}:  add x0, x0, x1
+// CHECK-NEXT:  This happens in the following basic block:
+// CHECK-NEXT:  {{[0-9a-f]+}}:   adrpx0, #{{.*}}
+// CHECK-NEXT:  {{[0-9a-f]+}}:   add x0, x0, x1
+// CHECK-NEXT:  {{[0-9a-f]+}}:   br  x0 # TAILCALL
+adrpx0, sym
+add x0, x0, x1
+br  x0
+.size   bad_adrp_with_nonconst_offset, .-bad_adrp_with_nonconst_offset
+
+.globl  bad_split_adrp
+.type   bad_split_adrp,@function
+bad_split_adrp:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_split_adrp, 
basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is {{[0-9a-f]+}}:  br  x0 # 
UNKNOWN CONTROL FLOW
+// CHECK-NEXT:  The 1 instructions that write to the affected registers after 
any authentication are:
+// CHECK-NEXT:  1. {{[0-9a-f]+}}:  add x0, x0, #0x{{[0-9a-f]+}}
+// CHECK-NEXT:  This happens in the following basic block:
+// CHECK-NEXT:  {{[0-9a-f]+}}:   add x0, x0, #0x{{[0-9a-f]+}}
+// CHECK-NEXT:  {{[0-9a-f]+}}:   br  x0 # UNKNOWN CONTROL FLOW
+cbz x2, 1f
+adrpx0, sym
+1:
+add x0, x0, :lo12:sym
+br  x0
+.size   bad_split_adrp, .-bad_split_adrp
+
+// Materialization of absolute addresses is not expected.
+
+.globl  bad_immediate_constant
+.type   bad_immediate_constant,@function
+bad_immediate_constant:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function 
bad_immediate_constant, basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is {{[0-9a-f]+}}:  br  x0 # 
TAILCALL
+// CHECK-NEXT:  The 1 instructions that write to the affected registers after 
any authentication are:
+// CHECK-NEXT:  1. {{[0-9a-f]+}}:  mov x0, #{{.*}}
+// CHECK-NEXT:  This happens in the following basic block:
+// CHECK-NEXT:  {{[0-9a-f]+}}:   mov x0, #{{.*}}
+// CHECK-NEXT:  {{[0-9a-f]+}}:   br  x0 # TAILCALL
+movzx0, #1234
+br  x0

jacobbramley wrote:

I think the classification of good and bad sequences is probably a bit tricky 
in general. For example, the `#1234` is not attacker-controlled, and in some 
real code we _might_ use `movz` and `movk` to materialise a constant address.

We can surely update these tests as other cases come up, so I don't think this 
needs to change, but I wanted to acknowledge i

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)

2025-03-26 Thread Jacob Bramley via llvm-branch-commits


@@ -0,0 +1,228 @@
+// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe
+// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s
+
+// Test various patterns that should or should not be considered safe
+// materialization of PC-relative addresses.
+//
+// Note that while "instructions that write to the affected registers"
+// section of the report is still technically correct, it does not necessarily
+// mentions the instructions that are used incorrectly.

jacobbramley wrote:

s/mentions/mention/

https://github.com/llvm/llvm-project/pull/132540
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)

2025-03-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/132540

>From 774ecf8d94cb7acff292af5b16655a62b8b23201 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Thu, 20 Mar 2025 20:15:07 +0300
Subject: [PATCH] [BOLT] Gadget scanner: Detect address materialization and
 arithmetics

In addition to authenticated pointers, consider the contents of a
register safe if it was
* written by PC-relative address computation
* updated by an arithmetic instruction whose input address is safe
---
 bolt/include/bolt/Core/MCPlusBuilder.h|  16 ++
 bolt/lib/Passes/PAuthGadgetScanner.cpp|  92 +--
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  30 +++
 .../AArch64/gs-pacret-autiasp.s   |  15 --
 .../gs-pauth-address-materialization.s| 228 ++
 .../binary-analysis/AArch64/lit.local.cfg |   3 +-
 6 files changed, 345 insertions(+), 39 deletions(-)
 create mode 100644 
bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h 
b/bolt/include/bolt/Core/MCPlusBuilder.h
index 8b6dc14121480..e94f82d00349a 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -587,6 +587,22 @@ class MCPlusBuilder {
 return getNoRegister();
   }
 
+  virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return getNoRegister();
+  }
+
+  /// Analyzes if this instruction can safely perform address arithmetics.
+  ///
+  /// If the first element of the returned pair is no-register, this 
instruction
+  /// is considered unknown. Otherwise, (output, input) pair is returned,
+  /// so that output is as trusted as input is.
+  virtual std::pair
+  analyzeSafeAddressArithmetics(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return std::make_pair(getNoRegister(), getNoRegister());
+  }
+
   virtual bool isTerminator(const MCInst &Inst) const;
 
   virtual bool isNoop(const MCInst &Inst) const {
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index a3b320c545734..16da08551a34d 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -335,6 +335,50 @@ class PacRetAnalysis
 });
   }
 
+  BitVector getClobberedRegs(const MCInst &Point) const {
+BitVector Clobbered(NumRegs, false);
+// Assume a call can clobber all registers, including callee-saved
+// registers. There's a good chance that callee-saved registers will be
+// saved on the stack at some point during execution of the callee.
+// Therefore they should also be considered as potentially modified by an
+// attacker/written to.
+// Also, not all functions may respect the AAPCS ABI rules about
+// caller/callee-saved registers.
+if (BC.MIB->isCall(Point))
+  Clobbered.set();
+else
+  BC.MIB->getClobberedRegs(Point, Clobbered);
+return Clobbered;
+  }
+
+  // Returns all registers that can be treated as if they are written by an
+  // authentication instruction.
+  SmallVector getAuthenticatedRegs(const MCInst &Point,
+  const State &Cur) const {
+SmallVector Regs;
+const MCPhysReg NoReg = BC.MIB->getNoRegister();
+
+// A signed pointer can be authenticated, or
+ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point);
+if (AutReg && *AutReg != NoReg)
+  Regs.push_back(*AutReg);
+
+// ... a safe address can be materialized, or
+MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point);
+if (NewAddrReg != NoReg)
+  Regs.push_back(NewAddrReg);
+
+// ... an address can be updated in a safe manner, producing the result
+// which is as trusted as the input address.
+MCPhysReg ArithResult, ArithSrc;
+std::tie(ArithResult, ArithSrc) =
+BC.MIB->analyzeSafeAddressArithmetics(Point);
+if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc])
+  Regs.push_back(ArithResult);
+
+return Regs;
+  }
+
   State computeNext(const MCInst &Point, const State &Cur) {
 PacStatePrinter P(BC);
 LLVM_DEBUG({
@@ -355,19 +399,20 @@ class PacRetAnalysis
   return State();
 }
 
+// First, compute various properties of the instruction, taking the state
+// before its execution into account, if necessary.
+
+BitVector Clobbered = getClobberedRegs(Point);
+// Compute the set of registers that can be considered as written by
+// an authentication instruction. This includes operations that are
+// *strictly better* than authentication, such as materializing a
+// PC-relative constant.
+SmallVector AuthenticatedOrBetter =
+getAuthenticatedRegs(Point, Cur);
+
+// Then, compute the state after this instruction is executed.
 State Next = Cur;
-BitVector Clobbered(NumRegs, false);
-// Assume a call can clo

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)

2025-03-26 Thread Nicolai Hähnle via llvm-branch-commits


@@ -251,8 +245,11 @@ body: |
 ; CHECK: liveins: $vgpr0
 ; CHECK-NEXT: {{  $}}
 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
-; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1)
+; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]]
+; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), 
[[C1]]
+; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], 
[[C1]]

nhaehnle wrote:

Similar here: This could be combined down to just a no-op -- don't combiners do 
that already? They should, and so this should probably not be handled 
separately by legalization

https://github.com/llvm/llvm-project/pull/132383
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)

2025-03-26 Thread Nicolai Hähnle via llvm-branch-commits


@@ -269,10 +266,12 @@ body: |
 ; CHECK: liveins: $vgpr0
 ; CHECK-NEXT: {{  $}}
 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
-; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1)
-; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
-; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ANYEXT]](s32), 
[[DEF]](s32)
+; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]]
+; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), 
[[C1]]
+; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], 
[[C1]]
+; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), 
[[C1]](s32)

nhaehnle wrote:

Could just be a single G_ANYEXT

https://github.com/llvm/llvm-project/pull/132383
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)

2025-03-26 Thread Nicolai Hähnle via llvm-branch-commits


@@ -233,8 +222,13 @@ body: |
 ; CHECK: liveins: $vgpr0
 ; CHECK-NEXT: {{  $}}
 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
-; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s16) = G_ANYEXT [[TRUNC]](s1)
+; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]]
+; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), 
[[C1]]
+; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s16) = G_CONSTANT i16 1
+; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s16) = G_CONSTANT i16 0
+; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s16) = G_SELECT [[ICMP]](s1), 
[[C2]], [[C3]]

nhaehnle wrote:

This is unnecessarily convoluted. A single `G_TRUNC` should do the trick. 
(Isn't that something a combiner could do?)

https://github.com/llvm/llvm-project/pull/132383
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)

2025-03-26 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle edited 
https://github.com/llvm/llvm-project/pull/132383
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)

2025-03-26 Thread Nicolai Hähnle via llvm-branch-commits


@@ -215,8 +205,7 @@ body: |
 ; CHECK: liveins: $sgpr0
 ; CHECK-NEXT: {{  $}}
 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
-; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[TRUNC]](s1)
+; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[COPY]](s32)

nhaehnle wrote:

Isn't this a correctness regression? I'm not entirely certain because I 
remember there was some weirdness around what G_TRUNC means semantically. Can 
you explain why there is no need for a trunc or bitwise and or something like 
that in the output?

Note that `anyext_s1_to_s32_vgpr` does leave a G_AND, so either that test shows 
a code quality issue or this test is incorrect.

https://github.com/llvm/llvm-project/pull/132383
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)

2025-03-26 Thread Nicolai Hähnle via llvm-branch-commits


@@ -160,8 +154,7 @@ body: |
 ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], 
[[C1]]
-; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
-; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), 
[[DEF]](s32)
+; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), 
[[C1]](s32)

nhaehnle wrote:

This change is a code quality regression: the input has `G_ANYEXT`, so the high 
half can be undefined.

https://github.com/llvm/llvm-project/pull/132383
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)

2025-03-26 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle commented:

I didn't look at everything, I just went through some of the tests.

https://github.com/llvm/llvm-project/pull/132383
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 1840957 - Revert "[ExecutionEngine] Avoid repeated hash lookups (NFC) (#132587)"

2025-03-26 Thread via llvm-branch-commits

Author: David Spickett
Date: 2025-03-26T15:03:58Z
New Revision: 1840957286329da64742ea50a9e28fd79e7baa56

URL: 
https://github.com/llvm/llvm-project/commit/1840957286329da64742ea50a9e28fd79e7baa56
DIFF: 
https://github.com/llvm/llvm-project/commit/1840957286329da64742ea50a9e28fd79e7baa56.diff

LOG: Revert "[ExecutionEngine] Avoid repeated hash lookups (NFC) (#132587)"

This reverts commit 0b181de20665574e086ed147868e34e8787a5286.

Added: 


Modified: 
llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h

Removed: 




diff  --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp 
b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 6333bda0270f8..def117448ab6a 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -1515,15 +1515,15 @@ void RuntimeDyldELF::resolveAArch64Branch(unsigned 
SectionID,
   uint64_t Offset = RelI->getOffset();
   unsigned RelType = RelI->getType();
   // Look for an existing stub.
-  auto [It, Inserted] = Stubs.try_emplace(Value);
-  if (!Inserted) {
+  StubMap::const_iterator i = Stubs.find(Value);
+  if (i != Stubs.end()) {
 resolveRelocation(Section, Offset,
-  Section.getLoadAddressWithOffset(It->second), RelType, 
0);
+  Section.getLoadAddressWithOffset(i->second), RelType, 0);
 LLVM_DEBUG(dbgs() << " Stub function found\n");
   } else if (!resolveAArch64ShortBranch(SectionID, RelI, Value)) {
 // Create a new stub function.
 LLVM_DEBUG(dbgs() << " Create a new stub function\n");
-It->second = Section.getStubOffset();
+Stubs[Value] = Section.getStubOffset();
 uint8_t *StubTargetAddr = createStubFunction(
 Section.getAddressWithOffset(Section.getStubOffset()));
 
@@ -1837,15 +1837,15 @@ RuntimeDyldELF::processRelocationRef(
   SectionEntry &Section = Sections[SectionID];
 
   //  Look up for existing stub.
-  auto [It, Inserted] = Stubs.try_emplace(Value);
-  if (!Inserted) {
-RelocationEntry RE(SectionID, Offset, RelType, It->second);
+  StubMap::const_iterator i = Stubs.find(Value);
+  if (i != Stubs.end()) {
+RelocationEntry RE(SectionID, Offset, RelType, i->second);
 addRelocationForSection(RE, SectionID);
 LLVM_DEBUG(dbgs() << " Stub function found\n");
   } else {
 // Create a new stub function.
 LLVM_DEBUG(dbgs() << " Create a new stub function\n");
-It->second = Section.getStubOffset();
+Stubs[Value] = Section.getStubOffset();
 
 unsigned AbiVariant = Obj.getPlatformFlags();
 
@@ -2075,10 +2075,10 @@ RuntimeDyldELF::processRelocationRef(
 SectionEntry &Section = Sections[SectionID];
 
 // Look for an existing stub.
-auto [It, Inserted] = Stubs.try_emplace(Value);
+StubMap::const_iterator i = Stubs.find(Value);
 uintptr_t StubAddress;
-if (!Inserted) {
-  StubAddress = uintptr_t(Section.getAddressWithOffset(It->second));
+if (i != Stubs.end()) {
+  StubAddress = uintptr_t(Section.getAddressWithOffset(i->second));
   LLVM_DEBUG(dbgs() << " Stub function found\n");
 } else {
   // Create a new stub function.
@@ -2089,7 +2089,7 @@ RuntimeDyldELF::processRelocationRef(
   alignTo(BaseAddress + Section.getStubOffset(), getStubAlignment());
   unsigned StubOffset = StubAddress - BaseAddress;
 
-  It->second = StubOffset;
+  Stubs[Value] = StubOffset;
   createStubFunction((uint8_t *)StubAddress);
   RelocationEntry RE(SectionID, StubOffset + 8, ELF::R_390_64,
  Value.Offset);

diff  --git 
a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h 
b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index e0d9f2af988fb..79b558eb7796d 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -307,14 +307,14 @@ class RuntimeDyldMachOARM
 // This is an ARM branch relocation, need to use a stub function.
 // Look up for existing stub.
 SectionEntry &Section = Sections[RE.SectionID];
-auto [It, Inserted] = Stubs.try_emplace(Value);
+RuntimeDyldMachO::StubMap::const_iterator i = Stubs.find(Value);
 uint8_t *Addr;
-if (!Inserted) {
-  Addr = Section.getAddressWithOffset(It->second);
+if (i != Stubs.end()) {
+  Addr = Section.getAddressWithOffset(i->second);
 } else {
   // Create a new stub function.
   assert(Section.getStubOffset() % 4 == 0 && "Misaligned stub");
-  It->second = Section.getStubOffset();
+  Stubs[Value] = Section.getStubOffset();
   uint32_t StubOpcode = 0;
   if (RE.RelTyp

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)

2025-03-26 Thread Anatoly Trosinenko via llvm-branch-commits


@@ -0,0 +1,228 @@
+// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe
+// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s
+
+// Test various patterns that should or should not be considered safe
+// materialization of PC-relative addresses.
+//
+// Note that while "instructions that write to the affected registers"
+// section of the report is still technically correct, it does not necessarily
+// mentions the instructions that are used incorrectly.
+//
+// FIXME: Switch to PAC* instructions instead of indirect tail call for testing
+//if a register is considered safe when detection of signing oracles is
+//implemented, as it is more traditional usage of PC-relative 
constants.
+//Moreover, using PAC instructions would improve test robustness, as
+//handling of *calls* can be influenced by what BOLT classifies as a
+//tail call, for example.
+
+.text
+
+// Define a function that is reachable by ADR instruction.
+.type   sym,@function
+sym:
+ret
+.size   sym, .-sym
+
+.globl  good_adr
+.type   good_adr,@function
+good_adr:
+// CHECK-NOT: good_adr
+adr x0, sym
+br  x0
+.size   good_adr, .-good_adr
+
+.globl  good_adrp
+.type   good_adrp,@function
+good_adrp:
+// CHECK-NOT: good_adrp
+adrpx0, sym
+br  x0
+.size   good_adrp, .-good_adrp
+
+.globl  good_adrp_add
+.type   good_adrp_add,@function
+good_adrp_add:
+// CHECK-NOT: good_adrp_add
+adrpx0, sym
+add x0, x0, :lo12:sym
+br  x0
+.size   good_adrp_add, .-good_adrp_add
+
+.globl  good_adrp_add_with_const_offset
+.type   good_adrp_add_with_const_offset,@function
+good_adrp_add_with_const_offset:
+// CHECK-NOT: good_adrp_add_with_const_offset
+adrpx0, sym
+add x0, x0, :lo12:sym
+add x0, x0, #8
+br  x0
+.size   good_adrp_add_with_const_offset, 
.-good_adrp_add_with_const_offset
+
+.globl  bad_adrp_with_nonconst_offset
+.type   bad_adrp_with_nonconst_offset,@function
+bad_adrp_with_nonconst_offset:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function 
bad_adrp_with_nonconst_offset, basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is {{[0-9a-f]+}}:  br  x0 # 
TAILCALL
+// CHECK-NEXT:  The 1 instructions that write to the affected registers after 
any authentication are:
+// CHECK-NEXT:  1. {{[0-9a-f]+}}:  add x0, x0, x1
+// CHECK-NEXT:  This happens in the following basic block:
+// CHECK-NEXT:  {{[0-9a-f]+}}:   adrpx0, #{{.*}}
+// CHECK-NEXT:  {{[0-9a-f]+}}:   add x0, x0, x1
+// CHECK-NEXT:  {{[0-9a-f]+}}:   br  x0 # TAILCALL
+adrpx0, sym
+add x0, x0, x1
+br  x0
+.size   bad_adrp_with_nonconst_offset, .-bad_adrp_with_nonconst_offset
+
+.globl  bad_split_adrp
+.type   bad_split_adrp,@function
+bad_split_adrp:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_split_adrp, 
basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is {{[0-9a-f]+}}:  br  x0 # 
UNKNOWN CONTROL FLOW
+// CHECK-NEXT:  The 1 instructions that write to the affected registers after 
any authentication are:
+// CHECK-NEXT:  1. {{[0-9a-f]+}}:  add x0, x0, #0x{{[0-9a-f]+}}
+// CHECK-NEXT:  This happens in the following basic block:
+// CHECK-NEXT:  {{[0-9a-f]+}}:   add x0, x0, #0x{{[0-9a-f]+}}
+// CHECK-NEXT:  {{[0-9a-f]+}}:   br  x0 # UNKNOWN CONTROL FLOW
+cbz x2, 1f
+adrpx0, sym
+1:
+add x0, x0, :lo12:sym
+br  x0
+.size   bad_split_adrp, .-bad_split_adrp
+
+// Materialization of absolute addresses is not expected.
+
+.globl  bad_immediate_constant
+.type   bad_immediate_constant,@function
+bad_immediate_constant:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function 
bad_immediate_constant, basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is {{[0-9a-f]+}}:  br  x0 # 
TAILCALL
+// CHECK-NEXT:  The 1 instructions that write to the affected registers after 
any authentication are:
+// CHECK-NEXT:  1. {{[0-9a-f]+}}:  mov x0, #{{.*}}
+// CHECK-NEXT:  This happens in the following basic block:
+// CHECK-NEXT:  {{[0-9a-f]+}}:   mov x0, #{{.*}}
+// CHECK-NEXT:  {{[0-9a-f]+}}:   br  x0 # TAILCALL
+movzx0, #1234
+br  x0

atrosinenko wrote:

You are right, updated the comment to clarify this.

https://github.com/llvm/llvm-project/pull/132540
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang-tools-extra] [clang] support pack expansions for trailing requires clauses (PR #133190)

2025-03-26 Thread Younan Zhang via llvm-branch-commits

zyn0217 wrote:

Looks like there are some dependencies on the implicit bool conversion. So feel 
free to drop the explicit specifier ;)

https://github.com/llvm/llvm-project/pull/133190
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CI] Move CI over to new project computation script (PR #132642)

2025-03-26 Thread Aiden Grossman via llvm-branch-commits

https://github.com/boomanaiden154 updated 
https://github.com/llvm/llvm-project/pull/132642


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [CI] Exclude docs directories from triggering rebuilds (PR #133185)

2025-03-26 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar approved this pull request.

Thank you.  We could probably do this for some other directories, but this is a 
good first start.

https://github.com/llvm/llvm-project/pull/133185
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [CI] Move CI over to new project computation script (PR #132642)

2025-03-26 Thread Aiden Grossman via llvm-branch-commits

https://github.com/boomanaiden154 updated 
https://github.com/llvm/llvm-project/pull/132642


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [CI] Move CI over to new project computation script (PR #132642)

2025-03-26 Thread Aiden Grossman via llvm-branch-commits

https://github.com/boomanaiden154 updated 
https://github.com/llvm/llvm-project/pull/132642


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libcxx] [test] Fix restoring LLVM_DIR and Clang_DIR (#132838) (PR #133153)

2025-03-26 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/133153

Backport 51bceb46f8eeb7c3d060387be315ca41855933c2

Requested by: @mstorsjo

>From b6cc484e46b8d837b5256a994c73e1530bbb807d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= 
Date: Wed, 26 Mar 2025 22:13:28 +0200
Subject: [PATCH] [libcxx] [test] Fix restoring LLVM_DIR and Clang_DIR
 (#132838)

In 664f345cd53d1f624d94f9889a1c9fff803e3391, a fix was introduced,
attempting to restore LLVM_DIR and Clang_DIR after doing
find_package(Clang).

However, 6775285e7695f2d45cf455f5d31b2c9fa9362d3d added a return if the
clangTidy target wasn't found. If this is hit, we don't restore LLVM_DIR
and Clang_DIR, which causes strange effects if CMake is rerun a second
time.

Move the code for restoring LLVM_DIR and Clang_DIR to directly after the
find_package calls, to make sure they are restored, regardless of the
find_package outcome.

(cherry picked from commit 51bceb46f8eeb7c3d060387be315ca41855933c2)
---
 libcxx/test/tools/clang_tidy_checks/CMakeLists.txt | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt 
b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt
index 0f8f0e8864d0f..da045fac92ce4 100644
--- a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt
+++ b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt
@@ -8,6 +8,10 @@ set(Clang_DIR_SAVE ${Clang_DIR})
 # versions must match. Otherwise there likely will be ODR-violations. This had
 # led to crashes and incorrect output of the clang-tidy based checks.
 find_package(Clang ${CMAKE_CXX_COMPILER_VERSION})
+
+set(LLVM_DIR "${LLVM_DIR_SAVE}" CACHE PATH "The directory containing a CMake 
configuration file for LLVM." FORCE)
+set(Clang_DIR "${Clang_DIR_SAVE}" CACHE PATH "The directory containing a CMake 
configuration file for Clang." FORCE)
+
 if(NOT Clang_FOUND)
   message(STATUS "Clang-tidy tests are disabled since the "
  "Clang development package is unavailable.")
@@ -19,9 +23,6 @@ if(NOT TARGET clangTidy)
   return()
 endif()
 
-set(LLVM_DIR "${LLVM_DIR_SAVE}" CACHE PATH "The directory containing a CMake 
configuration file for LLVM." FORCE)
-set(Clang_DIR "${Clang_DIR_SAVE}" CACHE PATH "The directory containing a CMake 
configuration file for Clang." FORCE)
-
 message(STATUS "Found system-installed LLVM ${LLVM_PACKAGE_VERSION} with 
headers in ${LLVM_INCLUDE_DIRS}")
 
 set(CMAKE_CXX_STANDARD 20)

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)

2025-03-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/132540

>From 774ecf8d94cb7acff292af5b16655a62b8b23201 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Thu, 20 Mar 2025 20:15:07 +0300
Subject: [PATCH 1/2] [BOLT] Gadget scanner: Detect address materialization and
 arithmetics

In addition to authenticated pointers, consider the contents of a
register safe if it was
* written by PC-relative address computation
* updated by an arithmetic instruction whose input address is safe
---
 bolt/include/bolt/Core/MCPlusBuilder.h|  16 ++
 bolt/lib/Passes/PAuthGadgetScanner.cpp|  92 +--
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  30 +++
 .../AArch64/gs-pacret-autiasp.s   |  15 --
 .../gs-pauth-address-materialization.s| 228 ++
 .../binary-analysis/AArch64/lit.local.cfg |   3 +-
 6 files changed, 345 insertions(+), 39 deletions(-)
 create mode 100644 
bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h 
b/bolt/include/bolt/Core/MCPlusBuilder.h
index 8b6dc14121480..e94f82d00349a 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -587,6 +587,22 @@ class MCPlusBuilder {
 return getNoRegister();
   }
 
+  virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return getNoRegister();
+  }
+
+  /// Analyzes if this instruction can safely perform address arithmetics.
+  ///
+  /// If the first element of the returned pair is no-register, this 
instruction
+  /// is considered unknown. Otherwise, (output, input) pair is returned,
+  /// so that output is as trusted as input is.
+  virtual std::pair
+  analyzeSafeAddressArithmetics(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return std::make_pair(getNoRegister(), getNoRegister());
+  }
+
   virtual bool isTerminator(const MCInst &Inst) const;
 
   virtual bool isNoop(const MCInst &Inst) const {
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index a3b320c545734..16da08551a34d 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -335,6 +335,50 @@ class PacRetAnalysis
 });
   }
 
+  BitVector getClobberedRegs(const MCInst &Point) const {
+BitVector Clobbered(NumRegs, false);
+// Assume a call can clobber all registers, including callee-saved
+// registers. There's a good chance that callee-saved registers will be
+// saved on the stack at some point during execution of the callee.
+// Therefore they should also be considered as potentially modified by an
+// attacker/written to.
+// Also, not all functions may respect the AAPCS ABI rules about
+// caller/callee-saved registers.
+if (BC.MIB->isCall(Point))
+  Clobbered.set();
+else
+  BC.MIB->getClobberedRegs(Point, Clobbered);
+return Clobbered;
+  }
+
+  // Returns all registers that can be treated as if they are written by an
+  // authentication instruction.
+  SmallVector getAuthenticatedRegs(const MCInst &Point,
+  const State &Cur) const {
+SmallVector Regs;
+const MCPhysReg NoReg = BC.MIB->getNoRegister();
+
+// A signed pointer can be authenticated, or
+ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point);
+if (AutReg && *AutReg != NoReg)
+  Regs.push_back(*AutReg);
+
+// ... a safe address can be materialized, or
+MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point);
+if (NewAddrReg != NoReg)
+  Regs.push_back(NewAddrReg);
+
+// ... an address can be updated in a safe manner, producing the result
+// which is as trusted as the input address.
+MCPhysReg ArithResult, ArithSrc;
+std::tie(ArithResult, ArithSrc) =
+BC.MIB->analyzeSafeAddressArithmetics(Point);
+if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc])
+  Regs.push_back(ArithResult);
+
+return Regs;
+  }
+
   State computeNext(const MCInst &Point, const State &Cur) {
 PacStatePrinter P(BC);
 LLVM_DEBUG({
@@ -355,19 +399,20 @@ class PacRetAnalysis
   return State();
 }
 
+// First, compute various properties of the instruction, taking the state
+// before its execution into account, if necessary.
+
+BitVector Clobbered = getClobberedRegs(Point);
+// Compute the set of registers that can be considered as written by
+// an authentication instruction. This includes operations that are
+// *strictly better* than authentication, such as materializing a
+// PC-relative constant.
+SmallVector AuthenticatedOrBetter =
+getAuthenticatedRegs(Point, Cur);
+
+// Then, compute the state after this instruction is executed.
 State Next = Cur;
-BitVector Clobbered(NumRegs, false);
-// Assume a call can

[llvm-branch-commits] [compiler-rt] [ctxprof][nfc] Move 2 implementation functions up in `CtxInstrProfiling.cpp` (PR #133146)

2025-03-26 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin ready_for_review 
https://github.com/llvm/llvm-project/pull/133146
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [llvm] RootAutodetect (PR #133147)

2025-03-26 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin created 
https://github.com/llvm/llvm-project/pull/133147

None

>From 7182baeef88e3d9448062118fd8af808a17fbcd9 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Mon, 24 Mar 2025 12:01:10 -0700
Subject: [PATCH] RootAutodetect

---
 compiler-rt/lib/ctx_profile/CMakeLists.txt|   2 +-
 .../lib/ctx_profile/CtxInstrContextNode.h |   1 +
 .../lib/ctx_profile/CtxInstrProfiling.cpp | 119 +++---
 .../lib/ctx_profile/CtxInstrProfiling.h   |   2 +-
 .../lib/ctx_profile/RootAutoDetector.cpp  |  84 +
 .../lib/ctx_profile/RootAutoDetector.h|  29 +
 .../TestCases/generate-context.cpp|   4 +-
 .../llvm/ProfileData/CtxInstrContextNode.h|   1 +
 8 files changed, 195 insertions(+), 47 deletions(-)

diff --git a/compiler-rt/lib/ctx_profile/CMakeLists.txt 
b/compiler-rt/lib/ctx_profile/CMakeLists.txt
index bb606449c61b1..446ebc96408dd 100644
--- a/compiler-rt/lib/ctx_profile/CMakeLists.txt
+++ b/compiler-rt/lib/ctx_profile/CMakeLists.txt
@@ -27,7 +27,7 @@ endif()
 add_compiler_rt_runtime(clang_rt.ctx_profile
   STATIC
   ARCHS ${CTX_PROFILE_SUPPORTED_ARCH}
-  OBJECT_LIBS RTSanitizerCommon RTSanitizerCommonLibc
+  OBJECT_LIBS RTSanitizerCommon RTSanitizerCommonLibc 
RTSanitizerCommonSymbolizer
   CFLAGS ${EXTRA_FLAGS}
   SOURCES ${CTX_PROFILE_SOURCES}
   ADDITIONAL_HEADERS ${CTX_PROFILE_HEADERS}
diff --git a/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h 
b/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
index a42bf9ebb01ea..aa052bc7eea6c 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
+++ b/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
@@ -127,6 +127,7 @@ class ContextNode final {
 /// MUTEXDECL takes one parameter, the name of a field that is a mutex.
 #define CTXPROF_FUNCTION_DATA(PTRDECL, VOLATILE_PTRDECL, MUTEXDECL)
\
   PTRDECL(FunctionData, Next)  
\
+  PTRDECL(void, EntryAddress)  
\
   VOLATILE_PTRDECL(ContextRoot, CtxRoot)   
\
   VOLATILE_PTRDECL(ContextNode, FlatCtx)   
\
   MUTEXDECL(Mutex)
diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp 
b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
index da291e0bbabdd..7e73214e639a3 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
+++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
@@ -7,6 +7,7 @@
 
//===--===//
 
 #include "CtxInstrProfiling.h"
+#include "RootAutoDetector.h"
 #include "sanitizer_common/sanitizer_allocator_internal.h"
 #include "sanitizer_common/sanitizer_atomic.h"
 #include "sanitizer_common/sanitizer_atomic_clang.h"
@@ -43,6 +44,12 @@ Arena *FlatCtxArena = nullptr;
 __thread bool IsUnderContext = false;
 __sanitizer::atomic_uint8_t ProfilingStarted = {};
 
+__sanitizer::atomic_uintptr_t RootDetector = {};
+RootAutoDetector *getRootDetector() {
+  return reinterpret_cast(
+  __sanitizer::atomic_load_relaxed(&RootDetector));
+}
+
 // utility to taint a pointer by setting the LSB. There is an assumption
 // throughout that the addresses of contexts are even (really, they should be
 // align(8), but "even"-ness is the minimum assumption)
@@ -201,7 +208,7 @@ ContextNode *getCallsiteSlow(GUID Guid, ContextNode 
**InsertionPoint,
   return Ret;
 }
 
-ContextNode *getFlatProfile(FunctionData &Data, GUID Guid,
+ContextNode *getFlatProfile(FunctionData &Data, void *Callee, GUID Guid,
 uint32_t NumCounters) {
   if (ContextNode *Existing = Data.FlatCtx)
 return Existing;
@@ -232,6 +239,7 @@ ContextNode *getFlatProfile(FunctionData &Data, GUID Guid,
 auto *Ret = allocContextNode(AllocBuff, Guid, NumCounters, 0);
 Data.FlatCtx = Ret;
 
+Data.EntryAddress = Callee;
 Data.Next = reinterpret_cast(
 __sanitizer::atomic_load_relaxed(&AllFunctionsData));
 while (!__sanitizer::atomic_compare_exchange_strong(
@@ -277,8 +285,29 @@ ContextRoot *FunctionData::getOrAllocateContextRoot() {
   return Root;
 }
 
-ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid,
- uint32_t NumCounters) {
+ContextNode *tryStartContextGivenRoot(ContextRoot *Root, GUID Guid,
+  uint32_t Counters, uint32_t Callsites)
+SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+  IsUnderContext = true;
+  __sanitizer::atomic_fetch_add(&Root->TotalEntries, 1,
+__sanitizer::memory_order_relaxed);
+
+  if (!Root->FirstMemBlock) {
+setupContext(Root, Guid, Counters, Callsites);
+  }
+  if (Root->Taken.TryLock()) {
+__llvm_ctx_profile_current_context_root = Root;
+onContextEnter(*Root->FirstNode);
+return Root->FirstNode;
+  }
+  // If this thread couldn't take the lock, return scratch context.
+  __llvm_ctx_profile_current_context_root

[llvm-branch-commits] [compiler-rt] [llvm] RootAutodetect (PR #133147)

2025-03-26 Thread Mircea Trofin via llvm-branch-commits

mtrofin wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/133147?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#133147** https://app.graphite.dev/github/pr/llvm/llvm-project/133147?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/133147?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#133146** https://app.graphite.dev/github/pr/llvm/llvm-project/133146?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#133106** https://app.graphite.dev/github/pr/llvm/llvm-project/133106?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/133147
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)

2025-03-26 Thread Nicolai Hähnle via llvm-branch-commits


@@ -13,7 +12,8 @@ body: |
 ; CHECK: liveins: $sgpr0
 ; CHECK-NEXT: {{  $}}
 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[COPY]](s32)
+; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY]](s32), 
[[DEF]](s32)

nhaehnle wrote:

Why are we legalizing this G_ANYEXT to G_MERGE_VALUES, but in 
`anyext_s1_to_s64_scc` we generate a new `G_ANYEXT`?

https://github.com/llvm/llvm-project/pull/132383
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-26 Thread Mingming Liu via llvm-branch-commits

https://github.com/mingmingl-llvm updated 
https://github.com/llvm/llvm-project/pull/129781

>From 072c44f0f9272682480cc2837196a906bd694276 Mon Sep 17 00:00:00 2001
From: mingmingl 
Date: Fri, 28 Feb 2025 14:41:56 -0800
Subject: [PATCH 1/4] [CodeGen][StaticDataSplitter]Support constant pool
 partitioning

---
 llvm/include/llvm/CodeGen/AsmPrinter.h|   8 +
 .../CodeGen/TargetLoweringObjectFileImpl.h|   6 +
 .../llvm/Target/TargetLoweringObjectFile.h|   7 +
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp|  22 ++-
 llvm/lib/CodeGen/StaticDataSplitter.cpp   |  56 +--
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  |  35 +
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp |  10 ++
 llvm/lib/Target/TargetLoweringObjectFile.cpp  |  10 ++
 llvm/lib/Target/X86/X86AsmPrinter.cpp |  10 ++
 .../AArch64/constant-pool-partition.ll| 141 ++
 .../CodeGen/X86/constant-pool-partition.ll| 131 
 11 files changed, 422 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/constant-pool-partition.ll
 create mode 100644 llvm/test/CodeGen/X86/constant-pool-partition.ll

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h 
b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 3da63af5ba571..2018f411be796 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -18,6 +18,8 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -132,6 +134,12 @@ class AsmPrinter : public MachineFunctionPass {
   /// default, this is equal to CurrentFnSym.
   MCSymbol *CurrentFnSymForSize = nullptr;
 
+  /// Provides the profile information for constants.
+  const StaticDataProfileInfo *SDPI = nullptr;
+
+  /// The profile summary information.
+  const ProfileSummaryInfo *PSI = nullptr;
+
   /// Map a basic block section ID to the begin and end symbols of that section
   ///  which determine the section's range.
   struct MBBSectionRange {
diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h 
b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 10f0594c267ae..563980fb24ab8 100644
--- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -68,6 +68,12 @@ class TargetLoweringObjectFileELF : public 
TargetLoweringObjectFile {
const Constant *C,
Align &Alignment) const override;
 
+  /// Similar to the function above, but append \p SectionSuffix to the section
+  /// name.
+  MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
+   const Constant *C, Align &Alignment,
+   StringRef SectionSuffix) const override;
+
   MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
   const TargetMachine &TM) const override;
 
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h 
b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index a5ed1b29dc1bc..1956748b8058b 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -104,6 +104,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
SectionKind Kind, const Constant *C,
Align &Alignment) const;
 
+  /// Similar to the function above, but append \p SectionSuffix to the section
+  /// name.
+  virtual MCSection *getSectionForConstant(const DataLayout &DL,
+   SectionKind Kind, const Constant *C,
+   Align &Alignment,
+   StringRef SectionSuffix) const;
+
   virtual MCSection *
   getSectionForMachineBasicBlock(const Function &F,
  const MachineBasicBlock &MBB,
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp 
b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3c4280333e76d..60018afe2f8a7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2791,8 +2791,26 @@ void AsmPrinter::emitConstantPool() {
 if (!CPE.isMachineConstantPoolEntry())
   C = CPE.Val.ConstVal;
 
-MCSection *S = getObjFileLowering().getSectionForConstant(
-getDataLayout(), Kind, C, Alignment);
+MCSection *S = nullptr;
+if (TM.Options.EnableStaticDataPartitioning) {
+  SmallString<8> SectionNameSuffix;
+  if (C && SDPI && PSI) {
+auto Count = SDPI->getConstantProfileCount(C);
+if (Count) {
+ 

[llvm-branch-commits] [compiler-rt] [ctxprof][nfc] Move 2 implementation functions up in `CtxInstrProfiling.cpp` (PR #133146)

2025-03-26 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin created 
https://github.com/llvm/llvm-project/pull/133146

None

>From bbe97a86d8cc94b484420db54f735dac8bc818cf Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Wed, 26 Mar 2025 10:10:43 -0700
Subject: [PATCH] [ctxprof][nfc] Move 2 implementation functions up in
 `CtxInstrProfiling.cpp`

---
 .../lib/ctx_profile/CtxInstrProfiling.cpp | 66 +--
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp 
b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
index b0e63a8861d86..da291e0bbabdd 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
+++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
@@ -244,6 +244,39 @@ ContextNode *getFlatProfile(FunctionData &Data, GUID Guid,
   return Data.FlatCtx;
 }
 
+// This should be called once for a Root. Allocate the first arena, set up the
+// first context.
+void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters,
+  uint32_t NumCallsites) {
+  __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
+  &AllContextsMutex);
+  // Re-check - we got here without having had taken a lock.
+  if (Root->FirstMemBlock)
+return;
+  const auto Needed = ContextNode::getAllocSize(NumCounters, NumCallsites);
+  auto *M = Arena::allocateNewArena(getArenaAllocSize(Needed));
+  Root->FirstMemBlock = M;
+  Root->CurrentMem = M;
+  Root->FirstNode = allocContextNode(M->tryBumpAllocate(Needed), Guid,
+ NumCounters, NumCallsites);
+  AllContextRoots.PushBack(Root);
+}
+
+ContextRoot *FunctionData::getOrAllocateContextRoot() {
+  auto *Root = CtxRoot;
+  if (Root)
+return Root;
+  __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Mutex);
+  Root = CtxRoot;
+  if (!Root) {
+Root = new (__sanitizer::InternalAlloc(sizeof(ContextRoot))) ContextRoot();
+CtxRoot = Root;
+  }
+
+  assert(Root);
+  return Root;
+}
+
 ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid,
  uint32_t NumCounters) {
 
@@ -333,39 +366,6 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData 
*Data, void *Callee,
   return Ret;
 }
 
-// This should be called once for a Root. Allocate the first arena, set up the
-// first context.
-void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters,
-  uint32_t NumCallsites) {
-  __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
-  &AllContextsMutex);
-  // Re-check - we got here without having had taken a lock.
-  if (Root->FirstMemBlock)
-return;
-  const auto Needed = ContextNode::getAllocSize(NumCounters, NumCallsites);
-  auto *M = Arena::allocateNewArena(getArenaAllocSize(Needed));
-  Root->FirstMemBlock = M;
-  Root->CurrentMem = M;
-  Root->FirstNode = allocContextNode(M->tryBumpAllocate(Needed), Guid,
- NumCounters, NumCallsites);
-  AllContextRoots.PushBack(Root);
-}
-
-ContextRoot *FunctionData::getOrAllocateContextRoot() {
-  auto *Root = CtxRoot;
-  if (Root)
-return Root;
-  __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Mutex);
-  Root = CtxRoot;
-  if (!Root) {
-Root = new (__sanitizer::InternalAlloc(sizeof(ContextRoot))) ContextRoot();
-CtxRoot = Root;
-  }
-
-  assert(Root);
-  return Root;
-}
-
 ContextNode *__llvm_ctx_profile_start_context(
 FunctionData *FData, GUID Guid, uint32_t Counters,
 uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [CI] Move CI over to new project computation script (PR #132642)

2025-03-26 Thread Aiden Grossman via llvm-branch-commits

https://github.com/boomanaiden154 updated 
https://github.com/llvm/llvm-project/pull/132642


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-26 Thread Mingming Liu via llvm-branch-commits


@@ -0,0 +1,131 @@
+target datalayout = 
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+; Tests that constant pool hotness is aggregated across the module. The

mingmingl-llvm wrote:

Grouped CHECK by functions and used CHECK-NEXT within a function.

Also make `@var` used by a hot basic block in `@main`.

https://github.com/llvm/llvm-project/pull/129781
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libcxx] [test] Fix restoring LLVM_DIR and Clang_DIR (#132838) (PR #133153)

2025-03-26 Thread via llvm-branch-commits

llvmbot wrote:

@mordante What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/133153
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CI] Move CI over to new project computation script (PR #132642)

2025-03-26 Thread Aiden Grossman via llvm-branch-commits

https://github.com/boomanaiden154 updated 
https://github.com/llvm/llvm-project/pull/132642


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Track final substitution for Subst* AST nodes (PR #132748)

2025-03-26 Thread Shafik Yaghmour via llvm-branch-commits


@@ -265,14 +265,14 @@ int k9 = f9(V9());
 // CHECK-ELIDE-TREE:   S9<
 // CHECK-ELIDE-TREE: [2 * ...],
 // CHECK-ELIDE-TREE: U9<
-// CHECK-ELIDE-TREE:   [(no qualifiers) != const] double>>

shafik wrote:

It is not obvious why this changed, can you explain?

https://github.com/llvm/llvm-project/pull/132748
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [llvm] RootAutodetect (PR #133147)

2025-03-26 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff bbe97a86d8cc94b484420db54f735dac8bc818cf 
7182baeef88e3d9448062118fd8af808a17fbcd9 --extensions cpp,h -- 
compiler-rt/lib/ctx_profile/CtxInstrContextNode.h 
compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp 
compiler-rt/lib/ctx_profile/CtxInstrProfiling.h 
compiler-rt/lib/ctx_profile/RootAutoDetector.cpp 
compiler-rt/lib/ctx_profile/RootAutoDetector.h 
compiler-rt/test/ctx_profile/TestCases/generate-context.cpp 
llvm/include/llvm/ProfileData/CtxInstrContextNode.h
``





View the diff from clang-format here.


``diff
diff --git a/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp 
b/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp
index 5888545a79..50b8f07655 100644
--- a/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp
+++ b/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp
@@ -42,7 +42,7 @@ void RootAutoDetector::start() {
   +[](void *Ctx) -> void * {
 RootAutoDetector *RAD = reinterpret_cast(Ctx);
 SleepForSeconds(30);
-Vector Copy;
+Vector Copy;
 {
   GenericScopedLock M(&RAD->AllSamplesMutex);
   Copy.Resize(RAD->AllSamples.Size());
@@ -72,9 +72,7 @@ void RootAutoDetector::start() {
   this);
 }
 
-void RootAutoDetector::join() {
-  pthread_join(WorkerThread, nullptr);
-}
+void RootAutoDetector::join() { pthread_join(WorkerThread, nullptr); }
 
 void RootAutoDetector::sample() {
   static thread_local bool Entered = false;
@@ -90,7 +88,8 @@ void RootAutoDetector::collectStack() {
   GET_CALLER_PC_BP;
   BufferedStackTrace CurrentStack;
   CurrentStack.Unwind(pc, bp, nullptr, false);
-  if (CurrentStack.size <= 2)  return;
+  if (CurrentStack.size <= 2)
+return;
   static thread_local PerThreadSamples *ThisThreadSamples =
   new (__sanitizer::InternalAlloc(sizeof(PerThreadSamples)))
   PerThreadSamples(*this);
diff --git a/compiler-rt/lib/ctx_profile/RootAutoDetector.h 
b/compiler-rt/lib/ctx_profile/RootAutoDetector.h
index 254a40b163..f5cecad2f3 100644
--- a/compiler-rt/lib/ctx_profile/RootAutoDetector.h
+++ b/compiler-rt/lib/ctx_profile/RootAutoDetector.h
@@ -78,7 +78,7 @@ class RootAutoDetector final {
   };
   SpinMutex AllSamplesMutex;
   SANITIZER_GUARDED_BY(AllSamplesMutex)
-  Vector AllSamples;
+  Vector AllSamples;
   atomic_uintptr_t &FunctionDataListHead;
   atomic_uintptr_t &Self;
   void collectStack();

``




https://github.com/llvm/llvm-project/pull/133147
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] release/20.x: XFAIL malloc_zone.cpp for darwin/lsan (#131234) (PR #133006)

2025-03-26 Thread Jon Roelofs via llvm-branch-commits


@@ -17,6 +17,8 @@
 // UBSan does not install a malloc zone.
 // XFAIL: ubsan
 //
+// Curently fails on darwin/lsan rdar://145873843

jroelofs wrote:

Radar links are appropriate for commit messages, but the community no longer 
accepts them in comments.

https://github.com/llvm/llvm-project/pull/133006
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-26 Thread Mingming Liu via llvm-branch-commits

https://github.com/mingmingl-llvm updated 
https://github.com/llvm/llvm-project/pull/129781

>From 072c44f0f9272682480cc2837196a906bd694276 Mon Sep 17 00:00:00 2001
From: mingmingl 
Date: Fri, 28 Feb 2025 14:41:56 -0800
Subject: [PATCH 1/3] [CodeGen][StaticDataSplitter]Support constant pool
 partitioning

---
 llvm/include/llvm/CodeGen/AsmPrinter.h|   8 +
 .../CodeGen/TargetLoweringObjectFileImpl.h|   6 +
 .../llvm/Target/TargetLoweringObjectFile.h|   7 +
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp|  22 ++-
 llvm/lib/CodeGen/StaticDataSplitter.cpp   |  56 +--
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  |  35 +
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp |  10 ++
 llvm/lib/Target/TargetLoweringObjectFile.cpp  |  10 ++
 llvm/lib/Target/X86/X86AsmPrinter.cpp |  10 ++
 .../AArch64/constant-pool-partition.ll| 141 ++
 .../CodeGen/X86/constant-pool-partition.ll| 131 
 11 files changed, 422 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/constant-pool-partition.ll
 create mode 100644 llvm/test/CodeGen/X86/constant-pool-partition.ll

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h 
b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 3da63af5ba571..2018f411be796 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -18,6 +18,8 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -132,6 +134,12 @@ class AsmPrinter : public MachineFunctionPass {
   /// default, this is equal to CurrentFnSym.
   MCSymbol *CurrentFnSymForSize = nullptr;
 
+  /// Provides the profile information for constants.
+  const StaticDataProfileInfo *SDPI = nullptr;
+
+  /// The profile summary information.
+  const ProfileSummaryInfo *PSI = nullptr;
+
   /// Map a basic block section ID to the begin and end symbols of that section
   ///  which determine the section's range.
   struct MBBSectionRange {
diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h 
b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 10f0594c267ae..563980fb24ab8 100644
--- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -68,6 +68,12 @@ class TargetLoweringObjectFileELF : public 
TargetLoweringObjectFile {
const Constant *C,
Align &Alignment) const override;
 
+  /// Similar to the function above, but append \p SectionSuffix to the section
+  /// name.
+  MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
+   const Constant *C, Align &Alignment,
+   StringRef SectionSuffix) const override;
+
   MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
   const TargetMachine &TM) const override;
 
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h 
b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index a5ed1b29dc1bc..1956748b8058b 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -104,6 +104,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
SectionKind Kind, const Constant *C,
Align &Alignment) const;
 
+  /// Similar to the function above, but append \p SectionSuffix to the section
+  /// name.
+  virtual MCSection *getSectionForConstant(const DataLayout &DL,
+   SectionKind Kind, const Constant *C,
+   Align &Alignment,
+   StringRef SectionSuffix) const;
+
   virtual MCSection *
   getSectionForMachineBasicBlock(const Function &F,
  const MachineBasicBlock &MBB,
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp 
b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3c4280333e76d..60018afe2f8a7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2791,8 +2791,26 @@ void AsmPrinter::emitConstantPool() {
 if (!CPE.isMachineConstantPoolEntry())
   C = CPE.Val.ConstVal;
 
-MCSection *S = getObjFileLowering().getSectionForConstant(
-getDataLayout(), Kind, C, Alignment);
+MCSection *S = nullptr;
+if (TM.Options.EnableStaticDataPartitioning) {
+  SmallString<8> SectionNameSuffix;
+  if (C && SDPI && PSI) {
+auto Count = SDPI->getConstantProfileCount(C);
+if (Count) {
+ 

[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-26 Thread Mingming Liu via llvm-branch-commits


@@ -0,0 +1,141 @@
+; RUN: llc -mtriple=aarch64 -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -function-sections=true \
+; RUN: -unique-section-names=false \
+; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; Repeat the RUN command above for big-endian systems.
+; RUN: llc -mtriple=aarch64_be -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -function-sections=true \
+; RUN: -unique-section-names=false \
+; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; Tests that constant pool hotness is aggregated across the module. The
+; static-data-splitter processes data from cold_func first, unprofiled_func
+; secondly, and then hot_func. Specifically, tests that
+; - If a constant is accessed by hot functions, all constant pools for this
+;   constant (e.g., from an unprofiled function, or cold function) should have
+;   `.hot` suffix.
+; - Similarly if a constant is accessed by both cold function and un-profiled
+;   function, constant pools for this constant should not have `.unlikely` 
suffix.
+
+; CHECK: .section  .rodata.cst8.hot,"aM",@progbits,8
+; CHECK: .LCPI0_0:
+; CHECK:  .xword   0x3fe5c28f5c28f5c3  // double 
0.68005
+; CHECK: .section  .rodata.cst8.unlikely,"aM",@progbits,8
+; CHECK: .LCPI0_1:
+; CHECK: .xword 0x3fe5eb851eb851ec  // double 
0.68505
+; CHECK:  .section .rodata.cst8,"aM",@progbits,8
+; CHECK: .LCPI0_2:
+; CHECK: .byte   0   // 0x0
+; CHECK: .byte   4   // 0x4
+; CHECK: .byte   8   // 0x8
+; CHECK: .byte   12  // 0xc
+; CHECK: .byte   255 // 0xff
+; CHECK: .byte   255 // 0xff
+; CHECK: .byte   255 // 0xff
+; CHECK: .byte   255 // 0xff
+
+; CHECK:  .section .rodata.cst8,"aM",@progbits,8
+; CHECK: .LCPI1_0:
+; CHECK: .byte   0   // 0x0
+; CHECK: .byte   4   // 0x4
+; CHECK: .byte   8   // 0x8
+; CHECK: .byte   12  // 0xc
+; CHECK: .byte   255 // 0xff
+; CHECK: .byte   255 // 0xff
+; CHECK: .byte   255 // 0xff
+; CHECK: .byte   255 // 0xff
+; CHECK:  .section.rodata.cst16.hot,"aM",@progbits,16
+; CHECK: .LCPI1_1:
+; CHECK:  .word   442 // 0x1ba
+; CHECK:  .word   100 // 0x64
+; CHECK:  .word   0   // 0x0
+; CHECK:  .word   0   // 0x0
+
+; CHECK:  .section.rodata.cst8.hot,"aM",@progbits,8
+; CHECK: .LCPI2_0:
+; CHECK:  .xword  0x3fe5c28f5c28f5c3  // double 
0.68005
+; CHECK:  .section.rodata.cst16.hot,"aM",@progbits,16
+; CHECK: .LCPI2_1:
+; CHECK:  .word   442 // 0x1ba
+; CHECK:  .word   100 // 0x64
+; CHECK:  .word   0   // 0x0
+; CHECK:  .word   0   // 0x0
+
+; CHECK:.section   .rodata.cst32,"aM",@progbits,32
+; CHECK:.globl val
+
+define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 {
+  %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.80e-01)
+  %num = tail call i32 (...) @func_taking_arbitrary_param(double 6.850e-01)
+  %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, 
<8 x i8> )
+  %t2 = bitcast <8 x i8> %t1 to <2 x i32>
+  %3 = extractelement <2 x i32> %t2, i32 1
+  %sum = add i32 %2, %3
+  %ret = add i32 %sum, %num
+  ret i32 %ret
+}
+
+declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>)
+declare i32 @func_taking_arbitrary_param(...)
+
+define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) {
+  %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, 
<8 x i8> )
+  %t2 = bitcast <8 x i8> %t1 to <4 x i16>
+  %t3 = zext <4 x i16> %t2 to <4 x i32>
+  %cmp = icmp ule <4 x i32> , %t3
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 {
+  %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.80e-01)
+  %b = icmp ule <4 x i32> %a, 
+  ret <4 x i1> %b
+}
+
+@val = unnamed_addr constant i256 1

mingmingl-llvm wrote:

Updated the test case to use this `@val` in a hot basic block in `@main`. The 
section suffix remains empty (not `.hot` or `.unlikely`) though, because `@val` 
has external linkage and the static-data-splitter pass only analyzes 
local-linkage vars.

For 

[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-26 Thread Mingming Liu via llvm-branch-commits


@@ -0,0 +1,141 @@
+; RUN: llc -mtriple=aarch64 -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -function-sections=true \
+; RUN: -unique-section-names=false \
+; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; Repeat the RUN command above for big-endian systems.
+; RUN: llc -mtriple=aarch64_be -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -function-sections=true \
+; RUN: -unique-section-names=false \
+; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; Tests that constant pool hotness is aggregated across the module. The
+; static-data-splitter processes data from cold_func first, unprofiled_func
+; secondly, and then hot_func. Specifically, tests that
+; - If a constant is accessed by hot functions, all constant pools for this
+;   constant (e.g., from an unprofiled function, or cold function) should have
+;   `.hot` suffix.
+; - Similarly if a constant is accessed by both cold function and un-profiled
+;   function, constant pools for this constant should not have `.unlikely` 
suffix.
+
+; CHECK: .section  .rodata.cst8.hot,"aM",@progbits,8
+; CHECK: .LCPI0_0:
+; CHECK:  .xword   0x3fe5c28f5c28f5c3  // double 
0.68005
+; CHECK: .section  .rodata.cst8.unlikely,"aM",@progbits,8
+; CHECK: .LCPI0_1:
+; CHECK: .xword 0x3fe5eb851eb851ec  // double 
0.68505
+; CHECK:  .section .rodata.cst8,"aM",@progbits,8
+; CHECK: .LCPI0_2:
+; CHECK: .byte   0   // 0x0
+; CHECK: .byte   4   // 0x4
+; CHECK: .byte   8   // 0x8
+; CHECK: .byte   12  // 0xc
+; CHECK: .byte   255 // 0xff
+; CHECK: .byte   255 // 0xff
+; CHECK: .byte   255 // 0xff
+; CHECK: .byte   255 // 0xff
+
+; CHECK:  .section .rodata.cst8,"aM",@progbits,8
+; CHECK: .LCPI1_0:
+; CHECK: .byte   0   // 0x0
+; CHECK: .byte   4   // 0x4
+; CHECK: .byte   8   // 0x8
+; CHECK: .byte   12  // 0xc
+; CHECK: .byte   255 // 0xff
+; CHECK: .byte   255 // 0xff
+; CHECK: .byte   255 // 0xff
+; CHECK: .byte   255 // 0xff
+; CHECK:  .section.rodata.cst16.hot,"aM",@progbits,16
+; CHECK: .LCPI1_1:
+; CHECK:  .word   442 // 0x1ba
+; CHECK:  .word   100 // 0x64
+; CHECK:  .word   0   // 0x0
+; CHECK:  .word   0   // 0x0
+
+; CHECK:  .section.rodata.cst8.hot,"aM",@progbits,8
+; CHECK: .LCPI2_0:
+; CHECK:  .xword  0x3fe5c28f5c28f5c3  // double 
0.68005
+; CHECK:  .section.rodata.cst16.hot,"aM",@progbits,16
+; CHECK: .LCPI2_1:
+; CHECK:  .word   442 // 0x1ba
+; CHECK:  .word   100 // 0x64
+; CHECK:  .word   0   // 0x0
+; CHECK:  .word   0   // 0x0
+
+; CHECK:.section   .rodata.cst32,"aM",@progbits,32
+; CHECK:.globl val
+
+define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 {
+  %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.80e-01)
+  %num = tail call i32 (...) @func_taking_arbitrary_param(double 6.850e-01)
+  %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, 
<8 x i8> )
+  %t2 = bitcast <8 x i8> %t1 to <2 x i32>
+  %3 = extractelement <2 x i32> %t2, i32 1
+  %sum = add i32 %2, %3
+  %ret = add i32 %sum, %num
+  ret i32 %ret
+}
+
+declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>)
+declare i32 @func_taking_arbitrary_param(...)
+
+define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) {
+  %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, 
<8 x i8> )
+  %t2 = bitcast <8 x i8> %t1 to <4 x i16>
+  %t3 = zext <4 x i16> %t2 to <4 x i32>
+  %cmp = icmp ule <4 x i32> , %t3

mingmingl-llvm wrote:

Updated unprofiled_func and hot_func so each function has distinct constants. 
The common  `<442, 100, 0, 0>` is constructed to test that `LCPI1_2` and 
`LCPI2_2` have the same section name and could be merged when linking.


https://github.com/llvm/llvm-project/pull/129781
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-26 Thread Mingming Liu via llvm-branch-commits


@@ -0,0 +1,141 @@
+; RUN: llc -mtriple=aarch64 -enable-split-machine-functions \

mingmingl-llvm wrote:

Currently in the target pass configuration, the data partitioning pass is added 
inside `TM->Options.EnableMachineFunctionSplitter || 
EnableMachineFunctionSplitter` 
(https://github.com/llvm/llvm-project/blob/9224165871cacc568b3895c736ff2a580e1e/llvm/lib/CodeGen/TargetPassConfig.cpp#L1243-L1262),
 initially to piggyback on the availability of MIRProfile at line 1248.

I'll send a follow-up patch to move it outside.

https://github.com/llvm/llvm-project/pull/129781
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [CI] Move CI over to new project computation script (PR #132642)

2025-03-26 Thread Aiden Grossman via llvm-branch-commits

https://github.com/boomanaiden154 updated 
https://github.com/llvm/llvm-project/pull/132642


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [llvm] Fix crash when complex deinterleaving operates on an unrolled loop (#129735) (PR #132031)

2025-03-26 Thread Igor Kirillov via llvm-branch-commits

https://github.com/igogo-x86 approved this pull request.


https://github.com/llvm/llvm-project/pull/132031
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [llvm] [ctxprof] root autodetection mechanism (PR #133147)

2025-03-26 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin edited 
https://github.com/llvm/llvm-project/pull/133147
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [HLSL] DO NOT MERGE - Resource constructors prototype (PR #132453)

2025-03-26 Thread Helena Kotas via llvm-branch-commits

https://github.com/hekota created 
https://github.com/llvm/llvm-project/pull/132453

None

>From 47b41c88a60a7f376070b9ff779ec955eebf523a Mon Sep 17 00:00:00 2001
From: Helena Kotas 
Date: Wed, 12 Mar 2025 17:20:51 -0700
Subject: [PATCH 1/3] [HLSL] Create default resource constructor with
 BuiltinTypeMethodBuilder

---
 clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp | 126 ++
 1 file changed, 72 insertions(+), 54 deletions(-)

diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp 
b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
index db0ed3434d837..a52c6a49264c8 100644
--- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
+++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
@@ -89,21 +89,24 @@ struct TemplateParameterListBuilder {
 // statement (unless the last statement is already a ReturnStmt).
 struct BuiltinTypeMethodBuilder {
 private:
-  struct MethodParam {
+  struct Param {
 const IdentifierInfo &NameII;
 QualType Ty;
 HLSLParamModifierAttr::Spelling Modifier;
-MethodParam(const IdentifierInfo &NameII, QualType Ty,
-HLSLParamModifierAttr::Spelling Modifier)
+Param(const IdentifierInfo &NameII, QualType Ty,
+  HLSLParamModifierAttr::Spelling Modifier)
 : NameII(NameII), Ty(Ty), Modifier(Modifier) {}
   };
 
   BuiltinTypeDeclBuilder &DeclBuilder;
-  DeclarationNameInfo NameInfo;
+  DeclarationName Name;
   QualType ReturnTy;
+  // method or constructor declaration (CXXConstructorDecl derives from
+  // CXXMethodDecl)
   CXXMethodDecl *Method;
   bool IsConst;
-  llvm::SmallVector Params;
+  bool IsConstructor;
+  llvm::SmallVector Params;
   llvm::SmallVector StmtsList;
 
   // Argument placeholders, inspired by std::placeholder. These are the indices
@@ -122,12 +125,14 @@ struct BuiltinTypeMethodBuilder {
   friend BuiltinTypeDeclBuilder;
 
   BuiltinTypeMethodBuilder(BuiltinTypeDeclBuilder &DB, DeclarationName &Name,
-   QualType ReturnTy, bool IsConst = false)
-  : DeclBuilder(DB), NameInfo(DeclarationNameInfo(Name, SourceLocation())),
-ReturnTy(ReturnTy), Method(nullptr), IsConst(IsConst) {}
-
-  BuiltinTypeMethodBuilder(BuiltinTypeDeclBuilder &DB, StringRef Name,
-   QualType ReturnTy, bool IsConst = false);
+   QualType ReturnTy, bool IsConst = false,
+   bool IsConstructor = false)
+  : DeclBuilder(DB), Name(Name), ReturnTy(ReturnTy), Method(nullptr),
+IsConst(IsConst), IsConstructor(IsConstructor) {}
+
+  BuiltinTypeMethodBuilder(BuiltinTypeDeclBuilder &DB, StringRef NameStr,
+   QualType ReturnTy, bool IsConst = false,
+   bool IsConstructor = false);
   BuiltinTypeMethodBuilder(const BuiltinTypeMethodBuilder &Other) = delete;
 
   ~BuiltinTypeMethodBuilder() { finalizeMethod(); }
@@ -148,7 +153,14 @@ struct BuiltinTypeMethodBuilder {
   Expr *getResourceHandleExpr();
 
 private:
-  void createMethodDecl();
+  void createDecl();
+
+  // Makes sure the declaration is created; should be called before any
+  // statement added or when access to 'this' is needed.
+  void ensureCompleteDecl() {
+if (!Method)
+  createDecl();
+  }
 };
 
 TemplateParameterListBuilder::~TemplateParameterListBuilder() {
@@ -323,13 +335,26 @@ Expr 
*BuiltinTypeMethodBuilder::convertPlaceholder(PlaceHolder PH) {
 }
 
 BuiltinTypeMethodBuilder::BuiltinTypeMethodBuilder(BuiltinTypeDeclBuilder &DB,
-   StringRef Name,
+   StringRef NameStr,
QualType ReturnTy,
-   bool IsConst)
-: DeclBuilder(DB), ReturnTy(ReturnTy), Method(nullptr), IsConst(IsConst) {
-  const IdentifierInfo &II =
-  DB.SemaRef.getASTContext().Idents.get(Name, tok::TokenKind::identifier);
-  NameInfo = DeclarationNameInfo(DeclarationName(&II), SourceLocation());
+   bool IsConst,
+   bool IsConstructor)
+: DeclBuilder(DB), ReturnTy(ReturnTy), Method(nullptr), IsConst(IsConst),
+  IsConstructor(IsConstructor) {
+
+  assert((!NameStr.empty() || IsConstructor) && "method needs a name");
+  assert(((IsConstructor && !IsConst) || !IsConstructor) &&
+ "constructor cannot be const");
+
+  ASTContext &AST = DB.SemaRef.getASTContext();
+  if (IsConstructor) {
+Name = AST.DeclarationNames.getCXXConstructorName(
+DB.Record->getTypeForDecl()->getCanonicalTypeUnqualified());
+  } else {
+const IdentifierInfo &II =
+AST.Idents.get(NameStr, tok::TokenKind::identifier);
+Name = DeclarationName(&II);
+  }
 }
 
 BuiltinTypeMethodBuilder &
@@ -342,13 +367,13 @@ BuiltinTypeMethodBuilder::addParam(StringRef Name, 
QualType Ty,
   return *this;
 }
 
-void BuiltinTypeMethodBuilder::createMethodDecl(

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)

2025-03-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/132540

>From 72e3de5990635d183b3b39cc55ad4dab5e104a29 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Thu, 20 Mar 2025 20:15:07 +0300
Subject: [PATCH] [BOLT] Gadget scanner: Detect address materialization and
 arithmetics

In addition to authenticated pointers, consider the contents of a
register safe if it was
* written by PC-relative address computation
* updated by an arithmetic instruction whose input address is safe
---
 bolt/include/bolt/Core/MCPlusBuilder.h|  16 ++
 bolt/lib/Passes/PAuthGadgetScanner.cpp|  92 +--
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  30 +++
 .../AArch64/gs-pacret-autiasp.s   |  15 --
 .../gs-pauth-address-materialization.s| 228 ++
 .../binary-analysis/AArch64/lit.local.cfg |   3 +-
 6 files changed, 345 insertions(+), 39 deletions(-)
 create mode 100644 
bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h 
b/bolt/include/bolt/Core/MCPlusBuilder.h
index 8b6dc14121480..e94f82d00349a 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -587,6 +587,22 @@ class MCPlusBuilder {
 return getNoRegister();
   }
 
+  virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return getNoRegister();
+  }
+
+  /// Analyzes if this instruction can safely perform address arithmetics.
+  ///
+  /// If the first element of the returned pair is no-register, this 
instruction
+  /// is considered unknown. Otherwise, (output, input) pair is returned,
+  /// so that output is as trusted as input is.
+  virtual std::pair
+  analyzeSafeAddressArithmetics(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return std::make_pair(getNoRegister(), getNoRegister());
+  }
+
   virtual bool isTerminator(const MCInst &Inst) const;
 
   virtual bool isNoop(const MCInst &Inst) const {
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index a3b320c545734..16da08551a34d 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -335,6 +335,50 @@ class PacRetAnalysis
 });
   }
 
+  BitVector getClobberedRegs(const MCInst &Point) const {
+BitVector Clobbered(NumRegs, false);
+// Assume a call can clobber all registers, including callee-saved
+// registers. There's a good chance that callee-saved registers will be
+// saved on the stack at some point during execution of the callee.
+// Therefore they should also be considered as potentially modified by an
+// attacker/written to.
+// Also, not all functions may respect the AAPCS ABI rules about
+// caller/callee-saved registers.
+if (BC.MIB->isCall(Point))
+  Clobbered.set();
+else
+  BC.MIB->getClobberedRegs(Point, Clobbered);
+return Clobbered;
+  }
+
+  // Returns all registers that can be treated as if they are written by an
+  // authentication instruction.
+  SmallVector getAuthenticatedRegs(const MCInst &Point,
+  const State &Cur) const {
+SmallVector Regs;
+const MCPhysReg NoReg = BC.MIB->getNoRegister();
+
+// A signed pointer can be authenticated, or
+ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point);
+if (AutReg && *AutReg != NoReg)
+  Regs.push_back(*AutReg);
+
+// ... a safe address can be materialized, or
+MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point);
+if (NewAddrReg != NoReg)
+  Regs.push_back(NewAddrReg);
+
+// ... an address can be updated in a safe manner, producing the result
+// which is as trusted as the input address.
+MCPhysReg ArithResult, ArithSrc;
+std::tie(ArithResult, ArithSrc) =
+BC.MIB->analyzeSafeAddressArithmetics(Point);
+if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc])
+  Regs.push_back(ArithResult);
+
+return Regs;
+  }
+
   State computeNext(const MCInst &Point, const State &Cur) {
 PacStatePrinter P(BC);
 LLVM_DEBUG({
@@ -355,19 +399,20 @@ class PacRetAnalysis
   return State();
 }
 
+// First, compute various properties of the instruction, taking the state
+// before its execution into account, if necessary.
+
+BitVector Clobbered = getClobberedRegs(Point);
+// Compute the set of registers that can be considered as written by
+// an authentication instruction. This includes operations that are
+// *strictly better* than authentication, such as materializing a
+// PC-relative constant.
+SmallVector AuthenticatedOrBetter =
+getAuthenticatedRegs(Point, Cur);
+
+// Then, compute the state after this instruction is executed.
 State Next = Cur;
-BitVector Clobbered(NumRegs, false);
-// Assume a call can clo

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)

2025-03-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/132540

>From 72e3de5990635d183b3b39cc55ad4dab5e104a29 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Thu, 20 Mar 2025 20:15:07 +0300
Subject: [PATCH] [BOLT] Gadget scanner: Detect address materialization and
 arithmetics

In addition to authenticated pointers, consider the contents of a
register safe if it was
* written by PC-relative address computation
* updated by an arithmetic instruction whose input address is safe
---
 bolt/include/bolt/Core/MCPlusBuilder.h|  16 ++
 bolt/lib/Passes/PAuthGadgetScanner.cpp|  92 +--
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  30 +++
 .../AArch64/gs-pacret-autiasp.s   |  15 --
 .../gs-pauth-address-materialization.s| 228 ++
 .../binary-analysis/AArch64/lit.local.cfg |   3 +-
 6 files changed, 345 insertions(+), 39 deletions(-)
 create mode 100644 
bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h 
b/bolt/include/bolt/Core/MCPlusBuilder.h
index 8b6dc14121480..e94f82d00349a 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -587,6 +587,22 @@ class MCPlusBuilder {
 return getNoRegister();
   }
 
+  virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return getNoRegister();
+  }
+
+  /// Analyzes if this instruction can safely perform address arithmetics.
+  ///
+  /// If the first element of the returned pair is no-register, this 
instruction
+  /// is considered unknown. Otherwise, (output, input) pair is returned,
+  /// so that output is as trusted as input is.
+  virtual std::pair
+  analyzeSafeAddressArithmetics(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return std::make_pair(getNoRegister(), getNoRegister());
+  }
+
   virtual bool isTerminator(const MCInst &Inst) const;
 
   virtual bool isNoop(const MCInst &Inst) const {
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index a3b320c545734..16da08551a34d 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -335,6 +335,50 @@ class PacRetAnalysis
 });
   }
 
+  BitVector getClobberedRegs(const MCInst &Point) const {
+BitVector Clobbered(NumRegs, false);
+// Assume a call can clobber all registers, including callee-saved
+// registers. There's a good chance that callee-saved registers will be
+// saved on the stack at some point during execution of the callee.
+// Therefore they should also be considered as potentially modified by an
+// attacker/written to.
+// Also, not all functions may respect the AAPCS ABI rules about
+// caller/callee-saved registers.
+if (BC.MIB->isCall(Point))
+  Clobbered.set();
+else
+  BC.MIB->getClobberedRegs(Point, Clobbered);
+return Clobbered;
+  }
+
+  // Returns all registers that can be treated as if they are written by an
+  // authentication instruction.
+  SmallVector getAuthenticatedRegs(const MCInst &Point,
+  const State &Cur) const {
+SmallVector Regs;
+const MCPhysReg NoReg = BC.MIB->getNoRegister();
+
+// A signed pointer can be authenticated, or
+ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point);
+if (AutReg && *AutReg != NoReg)
+  Regs.push_back(*AutReg);
+
+// ... a safe address can be materialized, or
+MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point);
+if (NewAddrReg != NoReg)
+  Regs.push_back(NewAddrReg);
+
+// ... an address can be updated in a safe manner, producing the result
+// which is as trusted as the input address.
+MCPhysReg ArithResult, ArithSrc;
+std::tie(ArithResult, ArithSrc) =
+BC.MIB->analyzeSafeAddressArithmetics(Point);
+if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc])
+  Regs.push_back(ArithResult);
+
+return Regs;
+  }
+
   State computeNext(const MCInst &Point, const State &Cur) {
 PacStatePrinter P(BC);
 LLVM_DEBUG({
@@ -355,19 +399,20 @@ class PacRetAnalysis
   return State();
 }
 
+// First, compute various properties of the instruction, taking the state
+// before its execution into account, if necessary.
+
+BitVector Clobbered = getClobberedRegs(Point);
+// Compute the set of registers that can be considered as written by
+// an authentication instruction. This includes operations that are
+// *strictly better* than authentication, such as materializing a
+// PC-relative constant.
+SmallVector AuthenticatedOrBetter =
+getAuthenticatedRegs(Point, Cur);
+
+// Then, compute the state after this instruction is executed.
 State Next = Cur;
-BitVector Clobbered(NumRegs, false);
-// Assume a call can clo

[llvm-branch-commits] [llvm] [GlobalISel] Combine redundant sext_inreg (PR #131624)

2025-03-26 Thread Pierre van Houtryve via llvm-branch-commits

https://github.com/Pierre-vh updated 
https://github.com/llvm/llvm-project/pull/131624

>From f4c801437460aef9b9c2e5f49d1e98ec90fadb16 Mon Sep 17 00:00:00 2001
From: pvanhout 
Date: Mon, 17 Mar 2025 13:54:59 +0100
Subject: [PATCH 1/4] [GlobalISel] Combine redundant sext_inreg

---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |   3 +
 .../include/llvm/Target/GlobalISel/Combine.td |   9 +-
 .../GlobalISel/CombinerHelperCasts.cpp|  27 +++
 .../combine-redundant-sext-inreg.mir  | 164 ++
 .../combine-sext-trunc-sextinreg.mir  |  87 ++
 .../CodeGen/AMDGPU/GlobalISel/llvm.abs.ll |   5 -
 6 files changed, 289 insertions(+), 6 deletions(-)
 create mode 100644 
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir
 create mode 100644 
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h 
b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9b78342c8fc39..5778377d125a8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -994,6 +994,9 @@ class CombinerHelper {
   // overflow sub
   bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const;
 
+  // (sext_inreg (sext_inreg x, K0), K1)
+  void applyRedundantSextInReg(MachineInstr &Root, MachineInstr &Other) const;
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td 
b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 660b03080f92e..6a0ff683a4647 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1849,6 +1849,12 @@ def anyext_of_anyext : ext_of_ext_opcodes;
 def anyext_of_zext : ext_of_ext_opcodes;
 def anyext_of_sext : ext_of_ext_opcodes;
 
+def sext_inreg_of_sext_inreg : GICombineRule<
+   (defs root:$dst),
+   (match (G_SEXT_INREG $x, $src, $a):$other,
+  (G_SEXT_INREG $dst, $x, $b):$root),
+   (apply [{ Helper.applyRedundantSextInReg(*${root}, *${other}); }])>;
+
 // Push cast through build vector.
 class buildvector_of_opcode : GICombineRule <
   (defs root:$root, build_fn_matchinfo:$matchinfo),
@@ -1896,7 +1902,8 @@ def cast_of_cast_combines: GICombineGroup<[
   sext_of_anyext,
   anyext_of_anyext,
   anyext_of_zext,
-  anyext_of_sext
+  anyext_of_sext,
+  sext_inreg_of_sext_inreg,
 ]>;
 
 def cast_combines: GICombineGroup<[
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp 
b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index 576fd5fd81703..883a62c308232 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -378,3 +378,30 @@ bool CombinerHelper::matchCastOfInteger(const MachineInstr 
&CastMI,
 return false;
   }
 }
+
+void CombinerHelper::applyRedundantSextInReg(MachineInstr &Root,
+ MachineInstr &Other) const {
+  assert(Root.getOpcode() == TargetOpcode::G_SEXT_INREG &&
+ Other.getOpcode() == TargetOpcode::G_SEXT_INREG);
+
+  unsigned RootWidth = Root.getOperand(2).getImm();
+  unsigned OtherWidth = Other.getOperand(2).getImm();
+
+  Register Dst = Root.getOperand(0).getReg();
+  Register OtherDst = Other.getOperand(0).getReg();
+  Register Src = Other.getOperand(1).getReg();
+
+  if (RootWidth >= OtherWidth) {
+// The root sext_inreg is entirely redundant because the other one
+// is narrower.
+Observer.changingAllUsesOfReg(MRI, Dst);
+MRI.replaceRegWith(Dst, OtherDst);
+Observer.finishedChangingAllUsesOfReg();
+  } else {
+// RootWidth < OtherWidth, rewrite this G_SEXT_INREG with the source of the
+// other G_SEXT_INREG.
+Builder.buildSExtInReg(Dst, Src, RootWidth);
+  }
+
+  Root.eraseFromParent();
+}
diff --git 
a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir
new file mode 100644
index 0..566ee8e6c338d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir
@@ -0,0 +1,164 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 
-run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: inreg8_inreg16
+tracksRegLiveness: true
+body: |
+  bb.0:
+liveins: $vgpr0
+; CHECK-LABEL: name: inreg8_inreg16
+; CHECK: liveins: $vgpr0
+; CHECK-NEXT: {{  $}}
+; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
+; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+%copy:_(s32) = COPY $vgpr0
+%inreg:_(s32) = G_SEXT_INREG %copy, 8
+%inreg1:_(s32) = G_SEXT_INREG %inreg, 16
+$vgpr0 = COPY %inreg1
+...
+
+

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port AMDGPUSetWavePriority to NPM (PR #130064)

2025-03-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/130064
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)

2025-03-26 Thread Nicholas Guy via llvm-branch-commits


@@ -2031,17 +2033,19 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
 /// scalar value.
 class VPPartialReductionRecipe : public VPSingleDefRecipe {
   unsigned Opcode;
+  unsigned ScaleFactor;

NickGuy-Arm wrote:

Nit: Could this be `VFScaleFactor` to match the equivalent in 
`VPReductionPHIRecipe`?

https://github.com/llvm/llvm-project/pull/133090
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)

2025-03-26 Thread Nicholas Guy via llvm-branch-commits


@@ -5026,10 +5026,23 @@ calculateRegisterUsage(VPlan &Plan, 
ArrayRef VFs,
 // even in the scalar case.
 RegUsage[ClassID] += 1;
   } else {
+// The output from scaled phis and scaled reductions actually have
+// fewer lanes than the VF.
+auto VF = VFs[J];
+if (auto *ReductionR = dyn_cast(R))

NickGuy-Arm wrote:

[Idle thought, feel free to ignore] 
I wonder if there's precedent to add a `getVFScaleFactor` or equivalent to the 
base recipe class (or one of the other subclasses), and allow any recipe to 
override it instead of explicitly checking for every type that could scale the 
VF. 
Likely not yet, and almost certainly not in this patch, but maybe something to 
consider in the future?

https://github.com/llvm/llvm-project/pull/133090
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Remove the need for _LIBCPP_TEMPLATE_VIS (PR #133010)

2025-03-26 Thread Nikolas Klauser via llvm-branch-commits

https://github.com/philnik777 ready_for_review 
https://github.com/llvm/llvm-project/pull/133010
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port AMDGPUSetWavePriority to NPM (PR #130064)

2025-03-26 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130064

>From 3dcdf09b2a4635da32ff99208ddad0c27bdc621e Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 06:07:23 +
Subject: [PATCH 1/2] [AMDGPU][NPM] Port AMDGPUSetWavePriority to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  9 +++-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  2 +-
 .../Target/AMDGPU/AMDGPUSetWavePriority.cpp   | 44 ++-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  5 +--
 llvm/test/CodeGen/AMDGPU/set-wave-priority.ll |  5 +++
 5 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 00a807192eb0c..6c7f93c740cf3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -399,6 +399,13 @@ class SILateBranchLoweringPass
   static bool isRequired() { return true; }
 };
 
+class AMDGPUSetWavePriorityPass
+: public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -511,7 +518,7 @@ void initializeGCNPreRAOptimizationsLegacyPass(PassRegistry 
&);
 extern char &GCNPreRAOptimizationsID;
 
 FunctionPass *createAMDGPUSetWavePriorityPass();
-void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
+void initializeAMDGPUSetWavePriorityLegacyPass(PassRegistry &);
 
 void initializeGCNRewritePartialRegUsesLegacyPass(llvm::PassRegistry &);
 extern char &GCNRewritePartialRegUsesID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 6a45392b5f099..bebb69d765654 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -102,6 +102,7 @@ MACHINE_FUNCTION_PASS("amdgpu-mark-last-scratch-load", 
AMDGPUMarkLastScratchLoad
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", 
GCNPreRALongBranchRegPass())
 MACHINE_FUNCTION_PASS("amdgpu-reserve-wwm-regs", AMDGPUReserveWWMRegsPass())
 MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUsesPass())
+MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizationsPass())
 MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
 MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass())
@@ -133,7 +134,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 #define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizationsPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUsesPass())
-DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", 
AMDGPUSetWavePriorityPass())
 
 DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp
index ec9922db9af12..0ff6d27f0e7c9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp
@@ -19,6 +19,7 @@
 #include "SIInstrInfo.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 
 using namespace llvm;
 
@@ -40,15 +41,11 @@ struct MBBInfo {
 
 using MBBInfoSet = DenseMap;
 
-class AMDGPUSetWavePriority : public MachineFunctionPass {
+class AMDGPUSetWavePriority {
 public:
   static char ID;
 
-  AMDGPUSetWavePriority() : MachineFunctionPass(ID) {}
-
-  StringRef getPassName() const override { return "Set wave priority"; }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
+  bool run(MachineFunction &MF);
 
 private:
   MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB,
@@ -58,15 +55,30 @@ class AMDGPUSetWavePriority : public MachineFunctionPass {
   const SIInstrInfo *TII;
 };
 
+class AMDGPUSetWavePriorityLegacy : public MachineFunctionPass {
+public:
+  static char ID;
+
+  AMDGPUSetWavePriorityLegacy() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override { return "Set wave priority"; }
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+if (skipFunction(MF.getFunction()))
+  return false;
+return AMDGPUSetWavePriority().run(MF);
+  }
+};
+
 } // End anonymous namespace.
 
-INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false,
-false)
+INITIALIZE_PASS(AMDGPUSetWavePriorityLegacy, DEBUG_TYPE, "Set wave priority",
+false, false)
 
-char AMDGPUSetWavePriority::ID = 0;
+char AMDGPUSetWavePriorityLegacy::ID = 0;
 
 FunctionPass *llvm::createAMDGPUSetWavePriorityPass() {

[llvm-branch-commits] [llvm] release/20.x: [llvm-dlltool] Implement the --identify option (PR #132483)

2025-03-26 Thread Martin Storsjö via llvm-branch-commits

mstorsjo wrote:

> @mstorsjo (or anyone else). If you would like to add a note about this fix in 
> the release notes (completely optional). Please reply to this comment with a 
> one or two sentence description of the fix. When you are done, please add the 
> release:note label to this PR.

We could add this bullet to the release notes:
- Implemented the `--identify` option in llvm-dlltool

https://github.com/llvm/llvm-project/pull/132483
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [PATCH] [clang][modules] Fix serialization and de-serialization of PCH module file refs (#105994) (#132802) (PR #133198)

2025-03-26 Thread Chuanqi Xu via llvm-branch-commits

ChuanqiXu9 wrote:

This is a simple fix to a problem with a (relatively) long history. I think it 
is good to backport this.

https://github.com/llvm/llvm-project/pull/133198
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [HEXAGON] Fix semantics of ordered FP compares (#131089) (PR #131270)

2025-03-26 Thread Brian Cain via llvm-branch-commits

androm3da wrote:

@iajbar can you review the cherry-pick of this floating point comparison fix?

https://github.com/llvm/llvm-project/pull/131270
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang-tools-extra] [clang] support pack expansions for trailing requires clauses (PR #133190)

2025-03-26 Thread Matheus Izvekov via llvm-branch-commits

mizvekov wrote:

Actually the current users of the implicit conversion are not good at all, will 
fix them instead.

https://github.com/llvm/llvm-project/pull/133190
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [hexagon] Enable --eh-frame-hdr (#130225) (PR #130678)

2025-03-26 Thread Brian Cain via llvm-branch-commits

androm3da wrote:

@iajbar  can you review this cherry-pick for the compiler driver?

https://github.com/llvm/llvm-project/pull/130678
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libcxx] [test] Fix restoring LLVM_DIR and Clang_DIR (#132838) (PR #133153)

2025-03-26 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-libcxx

Author: None (llvmbot)


Changes

Backport 51bceb46f8eeb7c3d060387be315ca41855933c2

Requested by: @mstorsjo

---
Full diff: https://github.com/llvm/llvm-project/pull/133153.diff


1 Files Affected:

- (modified) libcxx/test/tools/clang_tidy_checks/CMakeLists.txt (+4-3) 


``diff
diff --git a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt 
b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt
index 0f8f0e8864d0f..da045fac92ce4 100644
--- a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt
+++ b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt
@@ -8,6 +8,10 @@ set(Clang_DIR_SAVE ${Clang_DIR})
 # versions must match. Otherwise there likely will be ODR-violations. This had
 # led to crashes and incorrect output of the clang-tidy based checks.
 find_package(Clang ${CMAKE_CXX_COMPILER_VERSION})
+
+set(LLVM_DIR "${LLVM_DIR_SAVE}" CACHE PATH "The directory containing a CMake 
configuration file for LLVM." FORCE)
+set(Clang_DIR "${Clang_DIR_SAVE}" CACHE PATH "The directory containing a CMake 
configuration file for Clang." FORCE)
+
 if(NOT Clang_FOUND)
   message(STATUS "Clang-tidy tests are disabled since the "
  "Clang development package is unavailable.")
@@ -19,9 +23,6 @@ if(NOT TARGET clangTidy)
   return()
 endif()
 
-set(LLVM_DIR "${LLVM_DIR_SAVE}" CACHE PATH "The directory containing a CMake 
configuration file for LLVM." FORCE)
-set(Clang_DIR "${Clang_DIR_SAVE}" CACHE PATH "The directory containing a CMake 
configuration file for Clang." FORCE)
-
 message(STATUS "Found system-installed LLVM ${LLVM_PACKAGE_VERSION} with 
headers in ${LLVM_INCLUDE_DIRS}")
 
 set(CMAKE_CXX_STANDARD 20)

``




https://github.com/llvm/llvm-project/pull/133153
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libcxx] [test] Fix restoring LLVM_DIR and Clang_DIR (#132838) (PR #133153)

2025-03-26 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/133153
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang-tools-extra] [clang] support pack expansions for trailing requires clauses (PR #133190)

2025-03-26 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov updated 
https://github.com/llvm/llvm-project/pull/133190

>From bb164f3a8c86282ff6cb317ff10df21b33b11520 Mon Sep 17 00:00:00 2001
From: Matheus Izvekov 
Date: Wed, 26 Mar 2025 18:38:34 -0300
Subject: [PATCH] [clang] support pack expansions for trailing requires clauses

This fixes a crash when evaluating constraints from trailing
requires clauses, when these are part of a generic lambda which
is expanded.
---
 .../refactor/tweaks/ExtractVariable.cpp   |  6 +--
 clang/docs/ReleaseNotes.rst   |  2 +
 clang/include/clang/AST/ASTNodeTraverser.h|  4 +-
 clang/include/clang/AST/Decl.h| 36 ++---
 clang/include/clang/AST/DeclCXX.h | 20 
 clang/include/clang/AST/ExprCXX.h |  2 +-
 clang/include/clang/AST/RecursiveASTVisitor.h |  9 ++--
 clang/include/clang/Sema/Sema.h   | 14 +++---
 clang/lib/AST/ASTContext.cpp  |  7 ++-
 clang/lib/AST/ASTImporter.cpp |  5 +-
 clang/lib/AST/Decl.cpp| 16 +++---
 clang/lib/AST/DeclCXX.cpp | 33 ++--
 clang/lib/AST/DeclPrinter.cpp | 10 ++--
 clang/lib/AST/DeclTemplate.cpp|  6 +--
 clang/lib/AST/ExprCXX.cpp |  2 +-
 clang/lib/AST/ItaniumMangle.cpp   |  4 +-
 clang/lib/ASTMatchers/ASTMatchFinder.cpp  |  3 +-
 clang/lib/Index/IndexDecl.cpp |  4 +-
 clang/lib/Sema/SemaConcept.cpp|  6 +--
 clang/lib/Sema/SemaDecl.cpp   | 24 -
 clang/lib/Sema/SemaDeclCXX.cpp|  4 +-
 clang/lib/Sema/SemaFunctionEffects.cpp|  2 +-
 clang/lib/Sema/SemaLambda.cpp | 18 ---
 clang/lib/Sema/SemaOverload.cpp   | 12 +++--
 clang/lib/Sema/SemaTemplateDeductionGuide.cpp | 50 ---
 .../lib/Sema/SemaTemplateInstantiateDecl.cpp  |  4 +-
 clang/lib/Sema/TreeTransform.h|  7 ++-
 clang/lib/Serialization/ASTReaderDecl.cpp |  2 +-
 clang/lib/Serialization/ASTWriterDecl.cpp |  5 +-
 .../SemaCXX/fold_lambda_with_variadics.cpp|  9 
 clang/tools/libclang/CIndex.cpp   |  2 +-
 31 files changed, 191 insertions(+), 137 deletions(-)

diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp 
b/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp
index d84e501b87ce7..90dac3b76c648 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp
+++ b/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp
@@ -100,9 +100,9 @@ computeReferencedDecls(const clang::Expr *Expr) {
 TraverseLambdaCapture(LExpr, &Capture, Initializer);
   }
 
-  if (clang::Expr *const RequiresClause =
-  LExpr->getTrailingRequiresClause()) {
-TraverseStmt(RequiresClause);
+  if (const clang::Expr *RequiresClause =
+  LExpr->getTrailingRequiresClause().ConstraintExpr) {
+TraverseStmt(const_cast(RequiresClause));
   }
 
   for (auto *const TemplateParam : LExpr->getExplicitTemplateParameters())
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 962016c62fc68..9415b29191ddd 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -352,6 +352,8 @@ Bug Fixes to C++ Support
 - Improved fix for an issue with pack expansions of type constraints, where 
this
   now also works if the constraint has non-type or template template 
parameters.
   (#GH131798)
+- Fix crash when evaluating trailing requires clause of generic lambdas which 
are part of
+  a pack expansion.
 - Fixes matching of nested template template parameters. (#GH130362)
 - Correctly diagnoses template template paramters which have a pack parameter
   not in the last position.
diff --git a/clang/include/clang/AST/ASTNodeTraverser.h 
b/clang/include/clang/AST/ASTNodeTraverser.h
index f557555e96e59..b16595db07e18 100644
--- a/clang/include/clang/AST/ASTNodeTraverser.h
+++ b/clang/include/clang/AST/ASTNodeTraverser.h
@@ -534,8 +534,8 @@ class ASTNodeTraverser
   for (const auto *Parameter : D->parameters())
 Visit(Parameter);
 
-if (const Expr *TRC = D->getTrailingRequiresClause())
-  Visit(TRC);
+if (const AssociatedConstraint &TRC = D->getTrailingRequiresClause())
+  Visit(TRC.ConstraintExpr);
 
 if (Traversal == TK_IgnoreUnlessSpelledInSource && D->isDefaulted())
   return;
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 6557a4c4962ec..64ee8fcff40bc 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -81,13 +81,17 @@ enum class ImplicitParamKind;
 // Holds a constraint expression along with a pack expansion index, if
 // expanded.
 struct AssociatedConstraint {
-  const Expr *ConstraintExpr;
-  int ArgumentPackSubstitutionIndex;
+  const Expr *ConstraintExpr = nullptr;
+  int ArgumentPackSubstitutionIndex = -1;
+
+  cons

[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-26 Thread Wei Xiao via llvm-branch-commits

https://github.com/williamweixiao approved this pull request.


https://github.com/llvm/llvm-project/pull/129781
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits