[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)
https://github.com/SamTebbs33 created https://github.com/llvm/llvm-project/pull/133090 This PR accounts for scaled reductions in `calculateRegisterUsage` to reflect the fact that the number of lanes in their output is smaller than the VF. >From 6193c2c846710472c7e604ef33a15cda18771328 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Wed, 26 Mar 2025 14:01:59 + Subject: [PATCH] [LV] Reduce register usage for scaled reductions --- .../Transforms/Vectorize/LoopVectorize.cpp| 24 +- .../Transforms/Vectorize/VPRecipeBuilder.h| 3 +- llvm/lib/Transforms/Vectorize/VPlan.h | 14 +- .../partial-reduce-dot-product-neon.ll| 60 ++- .../AArch64/partial-reduce-dot-product.ll | 414 ++ 5 files changed, 495 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c9f314c0ba481..da701ef9ff1a2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5026,10 +5026,23 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef VFs, // even in the scalar case. RegUsage[ClassID] += 1; } else { +// The output from scaled phis and scaled reductions actually have +// fewer lanes than the VF. +auto VF = VFs[J]; +if (auto *ReductionR = dyn_cast(R)) + VF = VF.divideCoefficientBy(ReductionR->getVFScaleFactor()); +else if (auto *PartialReductionR = + dyn_cast(R)) + VF = VF.divideCoefficientBy(PartialReductionR->getScaleFactor()); +if (VF != VFs[J]) + LLVM_DEBUG(dbgs() << "LV(REG): Scaled down VF from " << VFs[J] +<< " to " << VF << " for "; + R->dump();); + for (VPValue *DefV : R->definedValues()) { Type *ScalarTy = TypeInfo.inferScalarType(DefV); unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy); - RegUsage[ClassID] += GetRegUsage(ScalarTy, VFs[J]); + RegUsage[ClassID] += GetRegUsage(ScalarTy, VF); } } } @@ -8963,8 +8976,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe( if (isa(Instr) || isa(Instr)) return tryToWidenMemory(Instr, Operands, Range); - if (getScalingForReduction(Instr)) -return tryToCreatePartialReduction(Instr, Operands); + if (auto ScaleFactor = getScalingForReduction(Instr)) +return tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value()); if (!shouldWiden(Instr, Range)) return nullptr; @@ -8988,7 +9001,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe( VPRecipeBase * VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction, - ArrayRef Operands) { + ArrayRef Operands, + unsigned ScaleFactor) { assert(Operands.size() == 2 && "Unexpected number of operands for partial reduction"); @@ -9021,7 +9035,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction, BinOp = Builder.createSelect(Mask, BinOp, Zero, Reduction->getDebugLoc()); } return new VPPartialReductionRecipe(ReductionOpcode, BinOp, Accumulator, - Reduction); + ScaleFactor, Reduction); } void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 334cfbad8bd7c..fd0064a34c4c9 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -178,7 +178,8 @@ class VPRecipeBuilder { /// Create and return a partial reduction recipe for a reduction instruction /// along with binary operation and reduction phi operands. VPRecipeBase *tryToCreatePartialReduction(Instruction *Reduction, -ArrayRef Operands); +ArrayRef Operands, +unsigned ScaleFactor); /// Set the recipe created for given ingredient. void setRecipe(Instruction *I, VPRecipeBase *R) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 80b3d2a760293..d84efb1bd6850 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2001,6 +2001,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, /// Generate the phi/select nodes. void execute(VPTransformState &State) override; + unsigned getVFScaleFactor() const { return VFScaleFactor; } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(ra
[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)
llvmbot wrote: @llvm/pr-subscribers-vectorizers Author: Sam Tebbs (SamTebbs33) Changes This PR accounts for scaled reductions in `calculateRegisterUsage` to reflect the fact that the number of lanes in their output is smaller than the VF. --- Patch is 56.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133090.diff 5 Files Affected: - (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+19-5) - (modified) llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h (+2-1) - (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+10-4) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll (+50-10) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll (+414) ``diff diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c9f314c0ba481..da701ef9ff1a2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5026,10 +5026,23 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef VFs, // even in the scalar case. RegUsage[ClassID] += 1; } else { +// The output from scaled phis and scaled reductions actually have +// fewer lanes than the VF. +auto VF = VFs[J]; +if (auto *ReductionR = dyn_cast(R)) + VF = VF.divideCoefficientBy(ReductionR->getVFScaleFactor()); +else if (auto *PartialReductionR = + dyn_cast(R)) + VF = VF.divideCoefficientBy(PartialReductionR->getScaleFactor()); +if (VF != VFs[J]) + LLVM_DEBUG(dbgs() << "LV(REG): Scaled down VF from " << VFs[J] +<< " to " << VF << " for "; + R->dump();); + for (VPValue *DefV : R->definedValues()) { Type *ScalarTy = TypeInfo.inferScalarType(DefV); unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy); - RegUsage[ClassID] += GetRegUsage(ScalarTy, VFs[J]); + RegUsage[ClassID] += GetRegUsage(ScalarTy, VF); } } } @@ -8963,8 +8976,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe( if (isa(Instr) || isa(Instr)) return tryToWidenMemory(Instr, Operands, Range); - if (getScalingForReduction(Instr)) -return tryToCreatePartialReduction(Instr, Operands); + if (auto ScaleFactor = getScalingForReduction(Instr)) +return tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value()); if (!shouldWiden(Instr, Range)) return nullptr; @@ -8988,7 +9001,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe( VPRecipeBase * VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction, - ArrayRef Operands) { + ArrayRef Operands, + unsigned ScaleFactor) { assert(Operands.size() == 2 && "Unexpected number of operands for partial reduction"); @@ -9021,7 +9035,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction, BinOp = Builder.createSelect(Mask, BinOp, Zero, Reduction->getDebugLoc()); } return new VPPartialReductionRecipe(ReductionOpcode, BinOp, Accumulator, - Reduction); + ScaleFactor, Reduction); } void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 334cfbad8bd7c..fd0064a34c4c9 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -178,7 +178,8 @@ class VPRecipeBuilder { /// Create and return a partial reduction recipe for a reduction instruction /// along with binary operation and reduction phi operands. VPRecipeBase *tryToCreatePartialReduction(Instruction *Reduction, -ArrayRef Operands); +ArrayRef Operands, +unsigned ScaleFactor); /// Set the recipe created for given ingredient. void setRecipe(Instruction *I, VPRecipeBase *R) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 80b3d2a760293..d84efb1bd6850 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2001,6 +2001,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, /// Generate the phi/select nodes. void execute(VPTransformState &State) override; + unsigned getVFScaleFactor() const { return VFScaleFactor; } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe.
[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: Sam Tebbs (SamTebbs33) Changes This PR accounts for scaled reductions in `calculateRegisterUsage` to reflect the fact that the number of lanes in their output is smaller than the VF. --- Patch is 56.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133090.diff 5 Files Affected: - (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+19-5) - (modified) llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h (+2-1) - (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+10-4) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll (+50-10) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll (+414) ``diff diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c9f314c0ba481..da701ef9ff1a2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5026,10 +5026,23 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef VFs, // even in the scalar case. RegUsage[ClassID] += 1; } else { +// The output from scaled phis and scaled reductions actually have +// fewer lanes than the VF. +auto VF = VFs[J]; +if (auto *ReductionR = dyn_cast(R)) + VF = VF.divideCoefficientBy(ReductionR->getVFScaleFactor()); +else if (auto *PartialReductionR = + dyn_cast(R)) + VF = VF.divideCoefficientBy(PartialReductionR->getScaleFactor()); +if (VF != VFs[J]) + LLVM_DEBUG(dbgs() << "LV(REG): Scaled down VF from " << VFs[J] +<< " to " << VF << " for "; + R->dump();); + for (VPValue *DefV : R->definedValues()) { Type *ScalarTy = TypeInfo.inferScalarType(DefV); unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy); - RegUsage[ClassID] += GetRegUsage(ScalarTy, VFs[J]); + RegUsage[ClassID] += GetRegUsage(ScalarTy, VF); } } } @@ -8963,8 +8976,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe( if (isa(Instr) || isa(Instr)) return tryToWidenMemory(Instr, Operands, Range); - if (getScalingForReduction(Instr)) -return tryToCreatePartialReduction(Instr, Operands); + if (auto ScaleFactor = getScalingForReduction(Instr)) +return tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value()); if (!shouldWiden(Instr, Range)) return nullptr; @@ -8988,7 +9001,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe( VPRecipeBase * VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction, - ArrayRef Operands) { + ArrayRef Operands, + unsigned ScaleFactor) { assert(Operands.size() == 2 && "Unexpected number of operands for partial reduction"); @@ -9021,7 +9035,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction, BinOp = Builder.createSelect(Mask, BinOp, Zero, Reduction->getDebugLoc()); } return new VPPartialReductionRecipe(ReductionOpcode, BinOp, Accumulator, - Reduction); + ScaleFactor, Reduction); } void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 334cfbad8bd7c..fd0064a34c4c9 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -178,7 +178,8 @@ class VPRecipeBuilder { /// Create and return a partial reduction recipe for a reduction instruction /// along with binary operation and reduction phi operands. VPRecipeBase *tryToCreatePartialReduction(Instruction *Reduction, -ArrayRef Operands); +ArrayRef Operands, +unsigned ScaleFactor); /// Set the recipe created for given ingredient. void setRecipe(Instruction *I, VPRecipeBase *R) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 80b3d2a760293..d84efb1bd6850 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2001,6 +2001,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, /// Generate the phi/select nodes. void execute(VPTransformState &State) override; + unsigned getVFScaleFactor() const { return VFScaleFactor; } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe.
[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)
@@ -5026,10 +5026,23 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef VFs, // even in the scalar case. RegUsage[ClassID] += 1; } else { +// The output from scaled phis and scaled reductions actually have +// fewer lanes than the VF. +auto VF = VFs[J]; +if (auto *ReductionR = dyn_cast(R)) + VF = VF.divideCoefficientBy(ReductionR->getVFScaleFactor()); +else if (auto *PartialReductionR = + dyn_cast(R)) + VF = VF.divideCoefficientBy(PartialReductionR->getScaleFactor()); +if (VF != VFs[J]) NickGuy-Arm wrote: Nit: If the condition is only used for debug output then can it be moved to inside the LLVM_DEBUG https://github.com/llvm/llvm-project/pull/133090 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++] Remove the need for _LIBCPP_TEMPLATE_VIS (PR #133010)
https://github.com/ldionne requested changes to this pull request. I am fine with the spirit of this patch. I think it needs a release note though since we're making library-wide changes. We don't think those changes are going to affect anyone, but I think it's a nice courtesy to give our users and it doesn't cost us anything to call it out. Also, I'd like this patch to be split into two. First, the changes that introduce `NAMESPACE_VIS` and that make `TEMPLATE_VIS` a no-op. Then, one or two weeks after landing the first patch, we can have a second patch that removes `_LIBCPP_TEMPLATE_VIS`, which is now a no-op. I think this will simplify things in case we need to revert the patch for an unforeseen reason, and it'll allow people who do wonky stuff (e.g. override `_LIBCPP_TEMPLATE_VIS` for their own purposes) to apply a temporary workaround while they figure out how not to do something unsupported. Again, that's low cost to us and it'll make things much easier for everyone in case things happen to go wrong, unexpectedly. https://github.com/llvm/llvm-project/pull/133010 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++] Remove the need for _LIBCPP_TEMPLATE_VIS (PR #133010)
https://github.com/ldionne edited https://github.com/llvm/llvm-project/pull/133010 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++] Remove the need for _LIBCPP_TEMPLATE_VIS (PR #133010)
@@ -64,25 +64,13 @@ Visibility Macros ABI, we should create a new _LIBCPP_HIDE_FROM_ABI_AFTER_XXX macro, and we can ldionne wrote: Not attached to this line: you should update the `Overview` at the beginning of this file to reflect the latest state of things. https://github.com/llvm/llvm-project/pull/133010 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)
https://github.com/NickGuy-Arm commented: Looks generally good to me so far, with a few nitpicks. https://github.com/llvm/llvm-project/pull/133090 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++] Remove the need for _LIBCPP_TEMPLATE_VIS (PR #133010)
ldionne wrote: The commit message should make it clear that this is a no-op on Clang. https://github.com/llvm/llvm-project/pull/133010 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/132540 >From 53f6310e26cb02a18d99a9350ff8162ea0ed22b6 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Thu, 20 Mar 2025 20:15:07 +0300 Subject: [PATCH] [BOLT] Gadget scanner: Detect address materialization and arithmetics In addition to authenticated pointers, consider the contents of a register safe if it was * written by PC-relative address computation * updated by an arithmetic instruction whose input address is safe --- bolt/include/bolt/Core/MCPlusBuilder.h| 16 ++ bolt/lib/Passes/PAuthGadgetScanner.cpp| 92 +-- .../Target/AArch64/AArch64MCPlusBuilder.cpp | 30 +++ .../AArch64/gs-pacret-autiasp.s | 15 -- .../gs-pauth-address-materialization.s| 228 ++ .../binary-analysis/AArch64/lit.local.cfg | 3 +- 6 files changed, 345 insertions(+), 39 deletions(-) create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 8b6dc14121480..e94f82d00349a 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -587,6 +587,22 @@ class MCPlusBuilder { return getNoRegister(); } + virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const { +llvm_unreachable("not implemented"); +return getNoRegister(); + } + + /// Analyzes if this instruction can safely perform address arithmetics. + /// + /// If the first element of the returned pair is no-register, this instruction + /// is considered unknown. Otherwise, (output, input) pair is returned, + /// so that output is as trusted as input is. + virtual std::pair + analyzeSafeAddressArithmetics(const MCInst &Inst) const { +llvm_unreachable("not implemented"); +return std::make_pair(getNoRegister(), getNoRegister()); + } + virtual bool isTerminator(const MCInst &Inst) const; virtual bool isNoop(const MCInst &Inst) const { diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index a3b320c545734..16da08551a34d 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -335,6 +335,50 @@ class PacRetAnalysis }); } + BitVector getClobberedRegs(const MCInst &Point) const { +BitVector Clobbered(NumRegs, false); +// Assume a call can clobber all registers, including callee-saved +// registers. There's a good chance that callee-saved registers will be +// saved on the stack at some point during execution of the callee. +// Therefore they should also be considered as potentially modified by an +// attacker/written to. +// Also, not all functions may respect the AAPCS ABI rules about +// caller/callee-saved registers. +if (BC.MIB->isCall(Point)) + Clobbered.set(); +else + BC.MIB->getClobberedRegs(Point, Clobbered); +return Clobbered; + } + + // Returns all registers that can be treated as if they are written by an + // authentication instruction. + SmallVector getAuthenticatedRegs(const MCInst &Point, + const State &Cur) const { +SmallVector Regs; +const MCPhysReg NoReg = BC.MIB->getNoRegister(); + +// A signed pointer can be authenticated, or +ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point); +if (AutReg && *AutReg != NoReg) + Regs.push_back(*AutReg); + +// ... a safe address can be materialized, or +MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point); +if (NewAddrReg != NoReg) + Regs.push_back(NewAddrReg); + +// ... an address can be updated in a safe manner, producing the result +// which is as trusted as the input address. +MCPhysReg ArithResult, ArithSrc; +std::tie(ArithResult, ArithSrc) = +BC.MIB->analyzeSafeAddressArithmetics(Point); +if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc]) + Regs.push_back(ArithResult); + +return Regs; + } + State computeNext(const MCInst &Point, const State &Cur) { PacStatePrinter P(BC); LLVM_DEBUG({ @@ -355,19 +399,20 @@ class PacRetAnalysis return State(); } +// First, compute various properties of the instruction, taking the state +// before its execution into account, if necessary. + +BitVector Clobbered = getClobberedRegs(Point); +// Compute the set of registers that can be considered as written by +// an authentication instruction. This includes operations that are +// *strictly better* than authentication, such as materializing a +// PC-relative constant. +SmallVector AuthenticatedOrBetter = +getAuthenticatedRegs(Point, Cur); + +// Then, compute the state after this instruction is executed. State Next = Cur; -BitVector Clobbered(NumRegs, false); -// Assume a call can clo
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/132540 >From 53f6310e26cb02a18d99a9350ff8162ea0ed22b6 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Thu, 20 Mar 2025 20:15:07 +0300 Subject: [PATCH] [BOLT] Gadget scanner: Detect address materialization and arithmetics In addition to authenticated pointers, consider the contents of a register safe if it was * written by PC-relative address computation * updated by an arithmetic instruction whose input address is safe --- bolt/include/bolt/Core/MCPlusBuilder.h| 16 ++ bolt/lib/Passes/PAuthGadgetScanner.cpp| 92 +-- .../Target/AArch64/AArch64MCPlusBuilder.cpp | 30 +++ .../AArch64/gs-pacret-autiasp.s | 15 -- .../gs-pauth-address-materialization.s| 228 ++ .../binary-analysis/AArch64/lit.local.cfg | 3 +- 6 files changed, 345 insertions(+), 39 deletions(-) create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 8b6dc14121480..e94f82d00349a 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -587,6 +587,22 @@ class MCPlusBuilder { return getNoRegister(); } + virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const { +llvm_unreachable("not implemented"); +return getNoRegister(); + } + + /// Analyzes if this instruction can safely perform address arithmetics. + /// + /// If the first element of the returned pair is no-register, this instruction + /// is considered unknown. Otherwise, (output, input) pair is returned, + /// so that output is as trusted as input is. + virtual std::pair + analyzeSafeAddressArithmetics(const MCInst &Inst) const { +llvm_unreachable("not implemented"); +return std::make_pair(getNoRegister(), getNoRegister()); + } + virtual bool isTerminator(const MCInst &Inst) const; virtual bool isNoop(const MCInst &Inst) const { diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index a3b320c545734..16da08551a34d 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -335,6 +335,50 @@ class PacRetAnalysis }); } + BitVector getClobberedRegs(const MCInst &Point) const { +BitVector Clobbered(NumRegs, false); +// Assume a call can clobber all registers, including callee-saved +// registers. There's a good chance that callee-saved registers will be +// saved on the stack at some point during execution of the callee. +// Therefore they should also be considered as potentially modified by an +// attacker/written to. +// Also, not all functions may respect the AAPCS ABI rules about +// caller/callee-saved registers. +if (BC.MIB->isCall(Point)) + Clobbered.set(); +else + BC.MIB->getClobberedRegs(Point, Clobbered); +return Clobbered; + } + + // Returns all registers that can be treated as if they are written by an + // authentication instruction. + SmallVector getAuthenticatedRegs(const MCInst &Point, + const State &Cur) const { +SmallVector Regs; +const MCPhysReg NoReg = BC.MIB->getNoRegister(); + +// A signed pointer can be authenticated, or +ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point); +if (AutReg && *AutReg != NoReg) + Regs.push_back(*AutReg); + +// ... a safe address can be materialized, or +MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point); +if (NewAddrReg != NoReg) + Regs.push_back(NewAddrReg); + +// ... an address can be updated in a safe manner, producing the result +// which is as trusted as the input address. +MCPhysReg ArithResult, ArithSrc; +std::tie(ArithResult, ArithSrc) = +BC.MIB->analyzeSafeAddressArithmetics(Point); +if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc]) + Regs.push_back(ArithResult); + +return Regs; + } + State computeNext(const MCInst &Point, const State &Cur) { PacStatePrinter P(BC); LLVM_DEBUG({ @@ -355,19 +399,20 @@ class PacRetAnalysis return State(); } +// First, compute various properties of the instruction, taking the state +// before its execution into account, if necessary. + +BitVector Clobbered = getClobberedRegs(Point); +// Compute the set of registers that can be considered as written by +// an authentication instruction. This includes operations that are +// *strictly better* than authentication, such as materializing a +// PC-relative constant. +SmallVector AuthenticatedOrBetter = +getAuthenticatedRegs(Point, Cur); + +// Then, compute the state after this instruction is executed. State Next = Cur; -BitVector Clobbered(NumRegs, false); -// Assume a call can clo
[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)
NickGuy-Arm wrote: Could you pre-commit this test, so we can see how the output changes before and after the changes in LoopVectorize.cpp https://github.com/llvm/llvm-project/pull/133090 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)
https://github.com/NickGuy-Arm edited https://github.com/llvm/llvm-project/pull/133090 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)
@@ -0,0 +1,228 @@ +// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe +// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s + +// Test various patterns that should or should not be considered safe +// materialization of PC-relative addresses. +// +// Note that while "instructions that write to the affected registers" +// section of the report is still technically correct, it does not necessarily +// mentions the instructions that are used incorrectly. +// +// FIXME: Switch to PAC* instructions instead of indirect tail call for testing +//if a register is considered safe when detection of signing oracles is +//implemented, as it is more traditional usage of PC-relative constants. +//Moreover, using PAC instructions would improve test robustness, as +//handling of *calls* can be influenced by what BOLT classifies as a +//tail call, for example. + +.text + atrosinenko wrote: Added `good_negative_offset` test case, thanks! https://github.com/llvm/llvm-project/pull/132540 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [TailDuplicator] Determine if computed gotos using `blockaddress` (#132536) (PR #133082)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/133082 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [TailDuplicator] Determine if computed gotos using `blockaddress` (#132536) (PR #133082)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/133082 Backport 66f158d91803875de63d8f2a437ce8ecb22c4141 Requested by: @dianqk >From 8241e69958028a64b9246986ee97b2d73ac48df4 Mon Sep 17 00:00:00 2001 From: dianqk Date: Wed, 26 Mar 2025 21:27:43 +0800 Subject: [PATCH] [TailDuplicator] Determine if computed gotos using `blockaddress` (#132536) Using `blockaddress` should be more reliable than determining if an operand comes from a jump table index. Alternative: Add the `MachineInstr::MIFlag::ComputedGoto` flag when lowering `indirectbr`. But I don't think this approach is suitable to backport. (cherry picked from commit 66f158d91803875de63d8f2a437ce8ecb22c4141) --- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 9 + llvm/include/llvm/CodeGen/MachineInstr.h | 16 +- llvm/lib/CodeGen/TailDuplicator.cpp | 2 +- .../CodeGen/X86/tail-dup-computed-goto.mir| 265 +- 4 files changed, 203 insertions(+), 89 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 0b803a9724742..11efb2f656a7a 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -311,6 +311,15 @@ class MachineBasicBlock const MachineFunction *getParent() const { return xParent; } MachineFunction *getParent() { return xParent; } + /// Returns true if the original IR terminator is an `indirectbr`. This + /// typically corresponds to a `goto` in C, rather than jump tables. + bool terminatorIsComputedGoto() const { +return back().isIndirectBranch() && + llvm::all_of(successors(), [](const MachineBasicBlock *Succ) { + return Succ->isIRBlockAddressTaken(); + }); + } + using instr_iterator = Instructions::iterator; using const_instr_iterator = Instructions::const_iterator; using reverse_instr_iterator = Instructions::reverse_iterator; diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index b26cabe801ee8..997d6a5554e06 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -994,17 +994,8 @@ class MachineInstr /// Return true if this is an indirect branch, such as a /// branch through a register. - bool isIndirectBranch(QueryType Type = AnyInBundle, -bool IncludeJumpTable = true) const { -return hasProperty(MCID::IndirectBranch, Type) && - (IncludeJumpTable || !llvm::any_of(operands(), [](const auto &Op) { - return Op.isJTI(); -})); - } - - bool isComputedGoto(QueryType Type = AnyInBundle) const { -// Jump tables are not considered computed gotos. -return isIndirectBranch(Type, /*IncludeJumpTable=*/false); + bool isIndirectBranch(QueryType Type = AnyInBundle) const { +return hasProperty(MCID::IndirectBranch, Type); } /// Return true if this is a branch which may fall @@ -2088,6 +2079,9 @@ class MachineInstr MCSymbol *PreInstrSymbol, MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker, MDNode *PCSections, uint32_t CFIType, MDNode *MMRAs); + + /// Returns true if all successors are IRBlockAddressTaken. + bool jumpToIRBlockAddressTaken() const; }; /// Special DenseMapInfo traits to compare MachineInstr* by *value* of the diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 21f75458c90f3..b0de3c322ddd0 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -604,7 +604,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, bool HasComputedGoto = false; if (!TailBB.empty()) { HasIndirectbr = TailBB.back().isIndirectBranch(); -HasComputedGoto = TailBB.back().isComputedGoto(); +HasComputedGoto = TailBB.terminatorIsComputedGoto(); } if (HasIndirectbr && PreRegAlloc) diff --git a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir index a472dc67d8d51..17de405928d37 100644 --- a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir +++ b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir @@ -2,15 +2,27 @@ # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=1 -tail-dup-succ-size=1 %s -o - | FileCheck %s # Check that only the computed goto is not be restrict by tail-dup-pred-size and tail-dup-succ-size. --- | + @computed_goto.dispatch = constant [5 x ptr] [ptr null, ptr blockaddress(@computed_goto, %bb1), ptr blockaddress(@computed_goto, %bb2), ptr blockaddress(@computed_goto, %bb3), ptr blockaddress(@computed_goto, %bb4)] declare i64 @f0() declare i64 @f1() declare i64 @f2() declare i64 @f3() declare i64 @f4() declare i64 @f5() - @computed_goto.dispatch = external global [5 x ptr] - define void @computed_goto() { ret void } + defi
[llvm-branch-commits] [llvm] release/20.x: [TailDuplicator] Determine if computed gotos using `blockaddress` (#132536) (PR #133082)
llvmbot wrote: @llvm/pr-subscribers-backend-x86 Author: None (llvmbot) Changes Backport 66f158d91803875de63d8f2a437ce8ecb22c4141 Requested by: @dianqk --- Patch is 29.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133082.diff 4 Files Affected: - (modified) llvm/include/llvm/CodeGen/MachineBasicBlock.h (+9) - (modified) llvm/include/llvm/CodeGen/MachineInstr.h (+5-11) - (modified) llvm/lib/CodeGen/TailDuplicator.cpp (+1-1) - (modified) llvm/test/CodeGen/X86/tail-dup-computed-goto.mir (+188-77) ``diff diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 0b803a9724742..11efb2f656a7a 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -311,6 +311,15 @@ class MachineBasicBlock const MachineFunction *getParent() const { return xParent; } MachineFunction *getParent() { return xParent; } + /// Returns true if the original IR terminator is an `indirectbr`. This + /// typically corresponds to a `goto` in C, rather than jump tables. + bool terminatorIsComputedGoto() const { +return back().isIndirectBranch() && + llvm::all_of(successors(), [](const MachineBasicBlock *Succ) { + return Succ->isIRBlockAddressTaken(); + }); + } + using instr_iterator = Instructions::iterator; using const_instr_iterator = Instructions::const_iterator; using reverse_instr_iterator = Instructions::reverse_iterator; diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index b26cabe801ee8..997d6a5554e06 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -994,17 +994,8 @@ class MachineInstr /// Return true if this is an indirect branch, such as a /// branch through a register. - bool isIndirectBranch(QueryType Type = AnyInBundle, -bool IncludeJumpTable = true) const { -return hasProperty(MCID::IndirectBranch, Type) && - (IncludeJumpTable || !llvm::any_of(operands(), [](const auto &Op) { - return Op.isJTI(); -})); - } - - bool isComputedGoto(QueryType Type = AnyInBundle) const { -// Jump tables are not considered computed gotos. -return isIndirectBranch(Type, /*IncludeJumpTable=*/false); + bool isIndirectBranch(QueryType Type = AnyInBundle) const { +return hasProperty(MCID::IndirectBranch, Type); } /// Return true if this is a branch which may fall @@ -2088,6 +2079,9 @@ class MachineInstr MCSymbol *PreInstrSymbol, MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker, MDNode *PCSections, uint32_t CFIType, MDNode *MMRAs); + + /// Returns true if all successors are IRBlockAddressTaken. + bool jumpToIRBlockAddressTaken() const; }; /// Special DenseMapInfo traits to compare MachineInstr* by *value* of the diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 21f75458c90f3..b0de3c322ddd0 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -604,7 +604,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, bool HasComputedGoto = false; if (!TailBB.empty()) { HasIndirectbr = TailBB.back().isIndirectBranch(); -HasComputedGoto = TailBB.back().isComputedGoto(); +HasComputedGoto = TailBB.terminatorIsComputedGoto(); } if (HasIndirectbr && PreRegAlloc) diff --git a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir index a472dc67d8d51..17de405928d37 100644 --- a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir +++ b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir @@ -2,15 +2,27 @@ # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=1 -tail-dup-succ-size=1 %s -o - | FileCheck %s # Check that only the computed goto is not be restrict by tail-dup-pred-size and tail-dup-succ-size. --- | + @computed_goto.dispatch = constant [5 x ptr] [ptr null, ptr blockaddress(@computed_goto, %bb1), ptr blockaddress(@computed_goto, %bb2), ptr blockaddress(@computed_goto, %bb3), ptr blockaddress(@computed_goto, %bb4)] declare i64 @f0() declare i64 @f1() declare i64 @f2() declare i64 @f3() declare i64 @f4() declare i64 @f5() - @computed_goto.dispatch = external global [5 x ptr] - define void @computed_goto() { ret void } + define void @computed_goto() { +start: + ret void +bb1: + ret void +bb2: + ret void +bb3: + ret void +bb4: + ret void + } define void @jump_table() { ret void } + define void @jump_table_pic() { ret void } ... --- name:computed_goto @@ -23,98 +35,88 @@ body: | ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def de
[llvm-branch-commits] [llvm] release/20.x: [TailDuplicator] Determine if computed gotos using `blockaddress` (#132536) (PR #133082)
llvmbot wrote: @arsenm What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/133082 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] cf9a10d - Revert "[MLIR][NVGPU] Use `gpu.dynamic_shared_memory` in tests (#133051)"
Author: Karlo Basioli Date: 2025-03-26T15:12:55Z New Revision: cf9a10d4665d71acd86de0b98cbaf25dd9dd03cc URL: https://github.com/llvm/llvm-project/commit/cf9a10d4665d71acd86de0b98cbaf25dd9dd03cc DIFF: https://github.com/llvm/llvm-project/commit/cf9a10d4665d71acd86de0b98cbaf25dd9dd03cc.diff LOG: Revert "[MLIR][NVGPU] Use `gpu.dynamic_shared_memory` in tests (#133051)" This reverts commit 15f5a7a3ec71c624cea0cbdf02e3c5205ba81d9d. Added: Modified: mlir/test/Integration/GPU/CUDA/sm90/gemm_f32_f16_f16_128x128x128.mlir mlir/test/Integration/GPU/CUDA/sm90/gemm_pred_f32_f16_f16_128x128x128.mlir mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir Removed: diff --git a/mlir/test/Integration/GPU/CUDA/sm90/gemm_f32_f16_f16_128x128x128.mlir b/mlir/test/Integration/GPU/CUDA/sm90/gemm_f32_f16_f16_128x128x128.mlir index 07324c603012a..1c5cf73db6eba 100644 --- a/mlir/test/Integration/GPU/CUDA/sm90/gemm_f32_f16_f16_128x128x128.mlir +++ b/mlir/test/Integration/GPU/CUDA/sm90/gemm_f32_f16_f16_128x128x128.mlir @@ -141,18 +141,14 @@ func.func @main() { %c16 = arith.constant 16 : index %c4096 = arith.constant 4096 : index %c8 = arith.constant 8 : index -%txcount = arith.constant 32768 : index -%c24576 = arith.constant 24576 : index -%c16384 = arith.constant 16384 : index -%c49152 = arith.constant 49152 : index -%c57344 = arith.constant 57344 : index +%txcount = arith.constant 32768 : index %tidx = gpu.thread_id x %dynamicMem = memref.get_global @dynamicShmem : memref<0xf16, 3> %lhsShmem = memref.reinterpret_cast %dynamicMem to offset: [0], sizes: [2, 128, 64], strides: [8192, 64, 1] : memref<0xf16, 3> to memref<2x128x64xf16, 3> %rhsShmem2 = memref.reinterpret_cast %dynamicMem to offset: [0], sizes: [4, 64, 128], strides: [8192,128,1] : memref<0xf16, 3> to memref<4x64x128xf16,3> %rhsShmem = memref.subview %rhsShmem2[2, 0, 0][2, 64, 128][1, 1, 1] : memref<4x64x128xf16,3> to memref<2x64x128xf16, strided<[8192, 128, 1], offset: 16384>, 3> -%dynsmem = gpu.dynamic_shared_memory : memref> + // Step 1. [GPU] Create Async Transactional Barriers (mbarriers) %barrier = nvgpu.mbarrier.create -> !barrierType %cnd = arith.cmpi eq, %tidx, %c0 : index @@ -165,29 +161,31 @@ func.func @main() { nvgpu.tma.prefetch.descriptor %descA : !lhsTensorMap nvgpu.tma.prefetch.descriptor %descB : !rhsTensorMap -// Step 4.1 [GPU] TMA Load Pipeline 1 +// Step 4.1 [GPU] TMA Load Pipeline 1 scf.if %cnd { %pipe = arith.constant 0 : index - %lhsSlice = memref.view %dynsmem[%c0][] : memref> to memref<128x64xf16, #gpu.address_space> - %halfFirst = memref.view %dynsmem[%c16384][] : memref> to memref<64x64xf16, #gpu.address_space> - %halfSecond = memref.view %dynsmem[%c24576][] : memref> to memref<64x64xf16, #gpu.address_space> + %lhsSlice = memref.subview %lhsShmem[0, 0, 0][1, 128, 64][1, 1, 1] : memref<2x128x64xf16, 3> to memref<128x64xf16, 3> + %rhsSlice = memref.subview %rhsShmem[0, 0, 0][1, 64, 128][1, 1, 1] : memref<2x64x128xf16, strided<[8192, 128, 1], offset: 16384>, 3> to memref<64x128xf16, strided<[128, 1], offset: 16384>, 3> + %halfFirst = memref.subview %rhsSlice[0, 0][64, 64][1, 1] : memref<64x128xf16, strided<[128, 1], offset: 16384>, 3> to memref<64x64xf16, strided<[128, 1], offset: 16384>, 3> + %halfSecond = memref.subview %rhsSlice[32, 0][64, 64][1, 1] : memref<64x128xf16, strided<[128, 1], offset: 16384>, 3> to memref<64x64xf16, strided<[128, 1], offset: 20480>, 3> nvgpu.mbarrier.arrive.expect_tx %barrier[%pipe], %txcount : !barrierType %dim = arith.muli %pipe, %c64 : index - nvgpu.tma.async.load %descA[%dim, %c0], %barrier[%pipe] to %lhsSlice : !lhsTensorMap, !barrierType -> memref<128x64xf16, #gpu.address_space> - nvgpu.tma.async.load %descB[%c0, %dim], %barrier[%pipe] to %halfFirst : !rhsTensorMap, !barrierType -> memref<64x64xf16, #gpu.address_space> - nvgpu.tma.async.load %descB[%c64, %dim], %barrier[%pipe] to %halfSecond : !rhsTensorMap, !barrierType -> memref<64x64xf16, #gpu.address_space> + nvgpu.tma.async.load %descA[%dim, %c0], %barrier[%pipe] to %lhsSlice : !lhsTensorMap, !barrierType -> memref<128x64xf16, 3> + nvgpu.tma.async.load %descB[%c0, %dim], %barrier[%pipe] to %halfFirst : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1], offset: 16384>, 3> + nvgpu.tma.async.load %descB[%c64, %dim], %barrier[%pipe] to %halfSecond : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1], offset: 20480>, 3> } // Step 4.2 [GPU] TMA Load Pipeline 2 scf.if %cnd { %pipe = arith.constant 1 : index - %lhsSlice = memref.view %dynsmem[%c32768][] : memref> to memref<128x64xf16, #gpu.address_space> - %halfFirst =
[llvm-branch-commits] [libcxx] [libc++] Remove the need for _LIBCPP_TEMPLATE_VIS (PR #133010)
llvmbot wrote: @llvm/pr-subscribers-libcxx Author: Nikolas Klauser (philnik777) Changes Since almost all of our symbols are explicitly annotated with visibility macros, this makes almost no difference to the actual visibility of symbols. Not having to annotate types at the cost of having a few more symbols with default visibility on GCC seems like the right choice to me. --- Patch is 322.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133010.diff 261 Files Affected: - (modified) libcxx/.clang-format (-1) - (modified) libcxx/docs/DesignDocs/VisibilityMacros.rst (+2-14) - (modified) libcxx/include/__chrono/duration.h (+5-5) - (modified) libcxx/include/__chrono/formatter.h (+22-22) - (modified) libcxx/include/__chrono/parser_std_format_spec.h (+1-1) - (modified) libcxx/include/__chrono/time_point.h (+2-3) - (modified) libcxx/include/__compare/common_comparison_category.h (+1-1) - (modified) libcxx/include/__compare/compare_three_way.h (+1-1) - (modified) libcxx/include/__compare/compare_three_way_result.h (+1-2) - (modified) libcxx/include/__config (+6-12) - (modified) libcxx/include/__coroutine/coroutine_handle.h (+3-3) - (modified) libcxx/include/__coroutine/noop_coroutine_handle.h (+1-1) - (modified) libcxx/include/__format/buffer.h (+11-11) - (modified) libcxx/include/__format/container_adaptor.h (+4-4) - (modified) libcxx/include/__format/format_arg.h (+3-3) - (modified) libcxx/include/__format/format_arg_store.h (+1-1) - (modified) libcxx/include/__format/format_args.h (+1-1) - (modified) libcxx/include/__format/format_context.h (+3-8) - (modified) libcxx/include/__format/format_functions.h (+5-5) - (modified) libcxx/include/__format/format_parse_context.h (+1-1) - (modified) libcxx/include/__format/format_string.h (+1-1) - (modified) libcxx/include/__format/format_to_n_result.h (+1-1) - (modified) libcxx/include/__format/formatter.h (+1-1) - (modified) libcxx/include/__format/formatter_bool.h (+1-1) - (modified) libcxx/include/__format/formatter_char.h (+4-4) - (modified) libcxx/include/__format/formatter_floating_point.h (+5-5) - (modified) libcxx/include/__format/formatter_integer.h (+13-13) - (modified) libcxx/include/__format/formatter_integral.h (+3-3) - (modified) libcxx/include/__format/formatter_pointer.h (+4-4) - (modified) libcxx/include/__format/formatter_string.h (+6-6) - (modified) libcxx/include/__format/formatter_tuple.h (+3-5) - (modified) libcxx/include/__format/parser_std_format_spec.h (+1-1) - (modified) libcxx/include/__format/range_default_formatter.h (+7-7) - (modified) libcxx/include/__format/range_formatter.h (+1-1) - (modified) libcxx/include/__functional/binary_function.h (+1-1) - (modified) libcxx/include/__functional/binary_negate.h (+1-1) - (modified) libcxx/include/__functional/binder1st.h (+2-2) - (modified) libcxx/include/__functional/binder2nd.h (+2-2) - (modified) libcxx/include/__functional/boyer_moore_searcher.h (+2-2) - (modified) libcxx/include/__functional/default_searcher.h (+1-1) - (modified) libcxx/include/__functional/function.h (+3-3) - (modified) libcxx/include/__functional/hash.h (+26-26) - (modified) libcxx/include/__functional/mem_fun_ref.h (+8-9) - (modified) libcxx/include/__functional/operations.h (+38-38) - (modified) libcxx/include/__functional/pointer_to_binary_function.h (+1-2) - (modified) libcxx/include/__functional/pointer_to_unary_function.h (+1-2) - (modified) libcxx/include/__functional/reference_wrapper.h (+1-1) - (modified) libcxx/include/__functional/unary_function.h (+1-1) - (modified) libcxx/include/__functional/unary_negate.h (+1-2) - (modified) libcxx/include/__fwd/array.h (+1-1) - (modified) libcxx/include/__fwd/complex.h (+1-1) - (modified) libcxx/include/__fwd/deque.h (+1-1) - (modified) libcxx/include/__fwd/format.h (+3-3) - (modified) libcxx/include/__fwd/fstream.h (+4-4) - (modified) libcxx/include/__fwd/functional.h (+3-3) - (modified) libcxx/include/__fwd/ios.h (+1-1) - (modified) libcxx/include/__fwd/istream.h (+2-2) - (modified) libcxx/include/__fwd/map.h (+2-2) - (modified) libcxx/include/__fwd/memory.h (+2-2) - (modified) libcxx/include/__fwd/memory_resource.h (+1-1) - (modified) libcxx/include/__fwd/ostream.h (+1-1) - (modified) libcxx/include/__fwd/pair.h (+1-1) - (modified) libcxx/include/__fwd/queue.h (+2-2) - (modified) libcxx/include/__fwd/set.h (+2-2) - (modified) libcxx/include/__fwd/sstream.h (+4-4) - (modified) libcxx/include/__fwd/stack.h (+1-1) - (modified) libcxx/include/__fwd/streambuf.h (+1-1) - (modified) libcxx/include/__fwd/string.h (+2-2) - (modified) libcxx/include/__fwd/string_view.h (+1-1) - (modified) libcxx/include/__fwd/subrange.h (+1-1) - (modified) libcxx/include/__fwd/tuple.h (+3-3) - (modified) libcxx/include/__fwd/variant.h (+3-3) - (modified) libcxx/include/__fwd/vector.h (+1-1) - (modified) libcxx/include/__hash_table (+22-22) - (modified) libcxx/include/_
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/132540 >From 774ecf8d94cb7acff292af5b16655a62b8b23201 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Thu, 20 Mar 2025 20:15:07 +0300 Subject: [PATCH] [BOLT] Gadget scanner: Detect address materialization and arithmetics In addition to authenticated pointers, consider the contents of a register safe if it was * written by PC-relative address computation * updated by an arithmetic instruction whose input address is safe --- bolt/include/bolt/Core/MCPlusBuilder.h| 16 ++ bolt/lib/Passes/PAuthGadgetScanner.cpp| 92 +-- .../Target/AArch64/AArch64MCPlusBuilder.cpp | 30 +++ .../AArch64/gs-pacret-autiasp.s | 15 -- .../gs-pauth-address-materialization.s| 228 ++ .../binary-analysis/AArch64/lit.local.cfg | 3 +- 6 files changed, 345 insertions(+), 39 deletions(-) create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 8b6dc14121480..e94f82d00349a 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -587,6 +587,22 @@ class MCPlusBuilder { return getNoRegister(); } + virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const { +llvm_unreachable("not implemented"); +return getNoRegister(); + } + + /// Analyzes if this instruction can safely perform address arithmetics. + /// + /// If the first element of the returned pair is no-register, this instruction + /// is considered unknown. Otherwise, (output, input) pair is returned, + /// so that output is as trusted as input is. + virtual std::pair + analyzeSafeAddressArithmetics(const MCInst &Inst) const { +llvm_unreachable("not implemented"); +return std::make_pair(getNoRegister(), getNoRegister()); + } + virtual bool isTerminator(const MCInst &Inst) const; virtual bool isNoop(const MCInst &Inst) const { diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index a3b320c545734..16da08551a34d 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -335,6 +335,50 @@ class PacRetAnalysis }); } + BitVector getClobberedRegs(const MCInst &Point) const { +BitVector Clobbered(NumRegs, false); +// Assume a call can clobber all registers, including callee-saved +// registers. There's a good chance that callee-saved registers will be +// saved on the stack at some point during execution of the callee. +// Therefore they should also be considered as potentially modified by an +// attacker/written to. +// Also, not all functions may respect the AAPCS ABI rules about +// caller/callee-saved registers. +if (BC.MIB->isCall(Point)) + Clobbered.set(); +else + BC.MIB->getClobberedRegs(Point, Clobbered); +return Clobbered; + } + + // Returns all registers that can be treated as if they are written by an + // authentication instruction. + SmallVector getAuthenticatedRegs(const MCInst &Point, + const State &Cur) const { +SmallVector Regs; +const MCPhysReg NoReg = BC.MIB->getNoRegister(); + +// A signed pointer can be authenticated, or +ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point); +if (AutReg && *AutReg != NoReg) + Regs.push_back(*AutReg); + +// ... a safe address can be materialized, or +MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point); +if (NewAddrReg != NoReg) + Regs.push_back(NewAddrReg); + +// ... an address can be updated in a safe manner, producing the result +// which is as trusted as the input address. +MCPhysReg ArithResult, ArithSrc; +std::tie(ArithResult, ArithSrc) = +BC.MIB->analyzeSafeAddressArithmetics(Point); +if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc]) + Regs.push_back(ArithResult); + +return Regs; + } + State computeNext(const MCInst &Point, const State &Cur) { PacStatePrinter P(BC); LLVM_DEBUG({ @@ -355,19 +399,20 @@ class PacRetAnalysis return State(); } +// First, compute various properties of the instruction, taking the state +// before its execution into account, if necessary. + +BitVector Clobbered = getClobberedRegs(Point); +// Compute the set of registers that can be considered as written by +// an authentication instruction. This includes operations that are +// *strictly better* than authentication, such as materializing a +// PC-relative constant. +SmallVector AuthenticatedOrBetter = +getAuthenticatedRegs(Point, Cur); + +// Then, compute the state after this instruction is executed. State Next = Cur; -BitVector Clobbered(NumRegs, false); -// Assume a call can clo
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)
@@ -0,0 +1,228 @@ +// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe +// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s + +// Test various patterns that should or should not be considered safe +// materialization of PC-relative addresses. +// +// Note that while "instructions that write to the affected registers" +// section of the report is still technically correct, it does not necessarily +// mentions the instructions that are used incorrectly. +// +// FIXME: Switch to PAC* instructions instead of indirect tail call for testing +//if a register is considered safe when detection of signing oracles is +//implemented, as it is more traditional usage of PC-relative constants. +//Moreover, using PAC instructions would improve test robustness, as +//handling of *calls* can be influenced by what BOLT classifies as a +//tail call, for example. + +.text + +// Define a function that is reachable by ADR instruction. +.type sym,@function +sym: +ret +.size sym, .-sym + +.globl good_adr +.type good_adr,@function +good_adr: +// CHECK-NOT: good_adr +adr x0, sym +br x0 +.size good_adr, .-good_adr + +.globl good_adrp +.type good_adrp,@function +good_adrp: +// CHECK-NOT: good_adrp +adrpx0, sym +br x0 +.size good_adrp, .-good_adrp + +.globl good_adrp_add +.type good_adrp_add,@function +good_adrp_add: +// CHECK-NOT: good_adrp_add +adrpx0, sym +add x0, x0, :lo12:sym +br x0 +.size good_adrp_add, .-good_adrp_add + +.globl good_adrp_add_with_const_offset +.type good_adrp_add_with_const_offset,@function +good_adrp_add_with_const_offset: +// CHECK-NOT: good_adrp_add_with_const_offset +adrpx0, sym +add x0, x0, :lo12:sym +add x0, x0, #8 +br x0 +.size good_adrp_add_with_const_offset, .-good_adrp_add_with_const_offset + +.globl bad_adrp_with_nonconst_offset +.type bad_adrp_with_nonconst_offset,@function +bad_adrp_with_nonconst_offset: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_adrp_with_nonconst_offset, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, x1 +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: adrpx0, #{{.*}} +// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, x1 +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL +adrpx0, sym +add x0, x0, x1 +br x0 +.size bad_adrp_with_nonconst_offset, .-bad_adrp_with_nonconst_offset + +.globl bad_split_adrp +.type bad_split_adrp,@function +bad_split_adrp: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_split_adrp, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}} +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}} +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW +cbz x2, 1f +adrpx0, sym +1: +add x0, x0, :lo12:sym +br x0 +.size bad_split_adrp, .-bad_split_adrp + +// Materialization of absolute addresses is not expected. + +.globl bad_immediate_constant +.type bad_immediate_constant,@function +bad_immediate_constant: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_immediate_constant, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x0, #{{.*}} +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: mov x0, #{{.*}} +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL +movzx0, #1234 +br x0 jacobbramley wrote: I think the classification of good and bad sequences is probably a bit tricky in general. For example, the `#1234` is not attacker-controlled, and in some real code we _might_ use `movz` and `movk` to materialise a constant address. We can surely update these tests as other cases come up, so I don't think this needs to change, but I wanted to acknowledge i
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)
@@ -0,0 +1,228 @@ +// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe +// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s + +// Test various patterns that should or should not be considered safe +// materialization of PC-relative addresses. +// +// Note that while "instructions that write to the affected registers" +// section of the report is still technically correct, it does not necessarily +// mentions the instructions that are used incorrectly. jacobbramley wrote: s/mentions/mention/ https://github.com/llvm/llvm-project/pull/132540 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/132540 >From 774ecf8d94cb7acff292af5b16655a62b8b23201 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Thu, 20 Mar 2025 20:15:07 +0300 Subject: [PATCH] [BOLT] Gadget scanner: Detect address materialization and arithmetics In addition to authenticated pointers, consider the contents of a register safe if it was * written by PC-relative address computation * updated by an arithmetic instruction whose input address is safe --- bolt/include/bolt/Core/MCPlusBuilder.h| 16 ++ bolt/lib/Passes/PAuthGadgetScanner.cpp| 92 +-- .../Target/AArch64/AArch64MCPlusBuilder.cpp | 30 +++ .../AArch64/gs-pacret-autiasp.s | 15 -- .../gs-pauth-address-materialization.s| 228 ++ .../binary-analysis/AArch64/lit.local.cfg | 3 +- 6 files changed, 345 insertions(+), 39 deletions(-) create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 8b6dc14121480..e94f82d00349a 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -587,6 +587,22 @@ class MCPlusBuilder { return getNoRegister(); } + virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const { +llvm_unreachable("not implemented"); +return getNoRegister(); + } + + /// Analyzes if this instruction can safely perform address arithmetics. + /// + /// If the first element of the returned pair is no-register, this instruction + /// is considered unknown. Otherwise, (output, input) pair is returned, + /// so that output is as trusted as input is. + virtual std::pair + analyzeSafeAddressArithmetics(const MCInst &Inst) const { +llvm_unreachable("not implemented"); +return std::make_pair(getNoRegister(), getNoRegister()); + } + virtual bool isTerminator(const MCInst &Inst) const; virtual bool isNoop(const MCInst &Inst) const { diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index a3b320c545734..16da08551a34d 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -335,6 +335,50 @@ class PacRetAnalysis }); } + BitVector getClobberedRegs(const MCInst &Point) const { +BitVector Clobbered(NumRegs, false); +// Assume a call can clobber all registers, including callee-saved +// registers. There's a good chance that callee-saved registers will be +// saved on the stack at some point during execution of the callee. +// Therefore they should also be considered as potentially modified by an +// attacker/written to. +// Also, not all functions may respect the AAPCS ABI rules about +// caller/callee-saved registers. +if (BC.MIB->isCall(Point)) + Clobbered.set(); +else + BC.MIB->getClobberedRegs(Point, Clobbered); +return Clobbered; + } + + // Returns all registers that can be treated as if they are written by an + // authentication instruction. + SmallVector getAuthenticatedRegs(const MCInst &Point, + const State &Cur) const { +SmallVector Regs; +const MCPhysReg NoReg = BC.MIB->getNoRegister(); + +// A signed pointer can be authenticated, or +ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point); +if (AutReg && *AutReg != NoReg) + Regs.push_back(*AutReg); + +// ... a safe address can be materialized, or +MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point); +if (NewAddrReg != NoReg) + Regs.push_back(NewAddrReg); + +// ... an address can be updated in a safe manner, producing the result +// which is as trusted as the input address. +MCPhysReg ArithResult, ArithSrc; +std::tie(ArithResult, ArithSrc) = +BC.MIB->analyzeSafeAddressArithmetics(Point); +if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc]) + Regs.push_back(ArithResult); + +return Regs; + } + State computeNext(const MCInst &Point, const State &Cur) { PacStatePrinter P(BC); LLVM_DEBUG({ @@ -355,19 +399,20 @@ class PacRetAnalysis return State(); } +// First, compute various properties of the instruction, taking the state +// before its execution into account, if necessary. + +BitVector Clobbered = getClobberedRegs(Point); +// Compute the set of registers that can be considered as written by +// an authentication instruction. This includes operations that are +// *strictly better* than authentication, such as materializing a +// PC-relative constant. +SmallVector AuthenticatedOrBetter = +getAuthenticatedRegs(Point, Cur); + +// Then, compute the state after this instruction is executed. State Next = Cur; -BitVector Clobbered(NumRegs, false); -// Assume a call can clo
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)
@@ -251,8 +245,11 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 -; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) -; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) +; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 +; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]] +; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 +; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]] +; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] nhaehnle wrote: Similar here: This could be combined down to just a no-op -- don't combiners do that already? They should, and so this should probably not be handled separately by legalization https://github.com/llvm/llvm-project/pull/132383 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)
@@ -269,10 +266,12 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 -; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) -; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1) -; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF -; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ANYEXT]](s32), [[DEF]](s32) +; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 +; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]] +; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 +; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]] +; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] +; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[C1]](s32) nhaehnle wrote: Could just be a single G_ANYEXT https://github.com/llvm/llvm-project/pull/132383 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)
@@ -233,8 +222,13 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 -; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) -; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s16) = G_ANYEXT [[TRUNC]](s1) +; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 +; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]] +; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 +; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]] +; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s16) = G_CONSTANT i16 1 +; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s16) = G_CONSTANT i16 0 +; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] nhaehnle wrote: This is unnecessarily convoluted. A single `G_TRUNC` should do the trick. (Isn't that something a combiner could do?) https://github.com/llvm/llvm-project/pull/132383 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)
https://github.com/nhaehnle edited https://github.com/llvm/llvm-project/pull/132383 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)
@@ -215,8 +205,7 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 -; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) -; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[TRUNC]](s1) +; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[COPY]](s32) nhaehnle wrote: Isn't this a correctness regression? I'm not entirely certain because I remember there was some weirdness around what G_TRUNC means semantically. Can you explain why there is no need for a trunc or bitwise and or something like that in the output? Note that `anyext_s1_to_s32_vgpr` does leave a G_AND, so either that test shows a code quality issue or this test is incorrect. https://github.com/llvm/llvm-project/pull/132383 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)
@@ -160,8 +154,7 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] -; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF -; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[DEF]](s32) +; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[C1]](s32) nhaehnle wrote: This change is a code quality regression: the input has `G_ANYEXT`, so the high half can be undefined. https://github.com/llvm/llvm-project/pull/132383 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)
https://github.com/nhaehnle commented: I didn't look at everything, I just went through some of the tests. https://github.com/llvm/llvm-project/pull/132383 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 1840957 - Revert "[ExecutionEngine] Avoid repeated hash lookups (NFC) (#132587)"
Author: David Spickett Date: 2025-03-26T15:03:58Z New Revision: 1840957286329da64742ea50a9e28fd79e7baa56 URL: https://github.com/llvm/llvm-project/commit/1840957286329da64742ea50a9e28fd79e7baa56 DIFF: https://github.com/llvm/llvm-project/commit/1840957286329da64742ea50a9e28fd79e7baa56.diff LOG: Revert "[ExecutionEngine] Avoid repeated hash lookups (NFC) (#132587)" This reverts commit 0b181de20665574e086ed147868e34e8787a5286. Added: Modified: llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h Removed: diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 6333bda0270f8..def117448ab6a 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -1515,15 +1515,15 @@ void RuntimeDyldELF::resolveAArch64Branch(unsigned SectionID, uint64_t Offset = RelI->getOffset(); unsigned RelType = RelI->getType(); // Look for an existing stub. - auto [It, Inserted] = Stubs.try_emplace(Value); - if (!Inserted) { + StubMap::const_iterator i = Stubs.find(Value); + if (i != Stubs.end()) { resolveRelocation(Section, Offset, - Section.getLoadAddressWithOffset(It->second), RelType, 0); + Section.getLoadAddressWithOffset(i->second), RelType, 0); LLVM_DEBUG(dbgs() << " Stub function found\n"); } else if (!resolveAArch64ShortBranch(SectionID, RelI, Value)) { // Create a new stub function. LLVM_DEBUG(dbgs() << " Create a new stub function\n"); -It->second = Section.getStubOffset(); +Stubs[Value] = Section.getStubOffset(); uint8_t *StubTargetAddr = createStubFunction( Section.getAddressWithOffset(Section.getStubOffset())); @@ -1837,15 +1837,15 @@ RuntimeDyldELF::processRelocationRef( SectionEntry &Section = Sections[SectionID]; // Look up for existing stub. - auto [It, Inserted] = Stubs.try_emplace(Value); - if (!Inserted) { -RelocationEntry RE(SectionID, Offset, RelType, It->second); + StubMap::const_iterator i = Stubs.find(Value); + if (i != Stubs.end()) { +RelocationEntry RE(SectionID, Offset, RelType, i->second); addRelocationForSection(RE, SectionID); LLVM_DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. LLVM_DEBUG(dbgs() << " Create a new stub function\n"); -It->second = Section.getStubOffset(); +Stubs[Value] = Section.getStubOffset(); unsigned AbiVariant = Obj.getPlatformFlags(); @@ -2075,10 +2075,10 @@ RuntimeDyldELF::processRelocationRef( SectionEntry &Section = Sections[SectionID]; // Look for an existing stub. -auto [It, Inserted] = Stubs.try_emplace(Value); +StubMap::const_iterator i = Stubs.find(Value); uintptr_t StubAddress; -if (!Inserted) { - StubAddress = uintptr_t(Section.getAddressWithOffset(It->second)); +if (i != Stubs.end()) { + StubAddress = uintptr_t(Section.getAddressWithOffset(i->second)); LLVM_DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. @@ -2089,7 +2089,7 @@ RuntimeDyldELF::processRelocationRef( alignTo(BaseAddress + Section.getStubOffset(), getStubAlignment()); unsigned StubOffset = StubAddress - BaseAddress; - It->second = StubOffset; + Stubs[Value] = StubOffset; createStubFunction((uint8_t *)StubAddress); RelocationEntry RE(SectionID, StubOffset + 8, ELF::R_390_64, Value.Offset); diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h index e0d9f2af988fb..79b558eb7796d 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h @@ -307,14 +307,14 @@ class RuntimeDyldMachOARM // This is an ARM branch relocation, need to use a stub function. // Look up for existing stub. SectionEntry &Section = Sections[RE.SectionID]; -auto [It, Inserted] = Stubs.try_emplace(Value); +RuntimeDyldMachO::StubMap::const_iterator i = Stubs.find(Value); uint8_t *Addr; -if (!Inserted) { - Addr = Section.getAddressWithOffset(It->second); +if (i != Stubs.end()) { + Addr = Section.getAddressWithOffset(i->second); } else { // Create a new stub function. assert(Section.getStubOffset() % 4 == 0 && "Misaligned stub"); - It->second = Section.getStubOffset(); + Stubs[Value] = Section.getStubOffset(); uint32_t StubOpcode = 0; if (RE.RelTyp
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)
@@ -0,0 +1,228 @@ +// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe +// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s + +// Test various patterns that should or should not be considered safe +// materialization of PC-relative addresses. +// +// Note that while "instructions that write to the affected registers" +// section of the report is still technically correct, it does not necessarily +// mentions the instructions that are used incorrectly. +// +// FIXME: Switch to PAC* instructions instead of indirect tail call for testing +//if a register is considered safe when detection of signing oracles is +//implemented, as it is more traditional usage of PC-relative constants. +//Moreover, using PAC instructions would improve test robustness, as +//handling of *calls* can be influenced by what BOLT classifies as a +//tail call, for example. + +.text + +// Define a function that is reachable by ADR instruction. +.type sym,@function +sym: +ret +.size sym, .-sym + +.globl good_adr +.type good_adr,@function +good_adr: +// CHECK-NOT: good_adr +adr x0, sym +br x0 +.size good_adr, .-good_adr + +.globl good_adrp +.type good_adrp,@function +good_adrp: +// CHECK-NOT: good_adrp +adrpx0, sym +br x0 +.size good_adrp, .-good_adrp + +.globl good_adrp_add +.type good_adrp_add,@function +good_adrp_add: +// CHECK-NOT: good_adrp_add +adrpx0, sym +add x0, x0, :lo12:sym +br x0 +.size good_adrp_add, .-good_adrp_add + +.globl good_adrp_add_with_const_offset +.type good_adrp_add_with_const_offset,@function +good_adrp_add_with_const_offset: +// CHECK-NOT: good_adrp_add_with_const_offset +adrpx0, sym +add x0, x0, :lo12:sym +add x0, x0, #8 +br x0 +.size good_adrp_add_with_const_offset, .-good_adrp_add_with_const_offset + +.globl bad_adrp_with_nonconst_offset +.type bad_adrp_with_nonconst_offset,@function +bad_adrp_with_nonconst_offset: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_adrp_with_nonconst_offset, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, x1 +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: adrpx0, #{{.*}} +// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, x1 +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL +adrpx0, sym +add x0, x0, x1 +br x0 +.size bad_adrp_with_nonconst_offset, .-bad_adrp_with_nonconst_offset + +.globl bad_split_adrp +.type bad_split_adrp,@function +bad_split_adrp: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_split_adrp, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}} +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}} +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW +cbz x2, 1f +adrpx0, sym +1: +add x0, x0, :lo12:sym +br x0 +.size bad_split_adrp, .-bad_split_adrp + +// Materialization of absolute addresses is not expected. + +.globl bad_immediate_constant +.type bad_immediate_constant,@function +bad_immediate_constant: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_immediate_constant, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x0, #{{.*}} +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: mov x0, #{{.*}} +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL +movzx0, #1234 +br x0 atrosinenko wrote: You are right, updated the comment to clarify this. https://github.com/llvm/llvm-project/pull/132540 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang-tools-extra] [clang] support pack expansions for trailing requires clauses (PR #133190)
zyn0217 wrote: Looks like there are some dependencies on the implicit bool conversion. So feel free to drop the explicit specifier ;) https://github.com/llvm/llvm-project/pull/133190 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CI] Move CI over to new project computation script (PR #132642)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/132642 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CI] Exclude docs directories from triggering rebuilds (PR #133185)
https://github.com/tstellar approved this pull request. Thank you. We could probably do this for some other directories, but this is a good first start. https://github.com/llvm/llvm-project/pull/133185 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CI] Move CI over to new project computation script (PR #132642)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/132642 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CI] Move CI over to new project computation script (PR #132642)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/132642 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libcxx] [test] Fix restoring LLVM_DIR and Clang_DIR (#132838) (PR #133153)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/133153 Backport 51bceb46f8eeb7c3d060387be315ca41855933c2 Requested by: @mstorsjo >From b6cc484e46b8d837b5256a994c73e1530bbb807d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 26 Mar 2025 22:13:28 +0200 Subject: [PATCH] [libcxx] [test] Fix restoring LLVM_DIR and Clang_DIR (#132838) In 664f345cd53d1f624d94f9889a1c9fff803e3391, a fix was introduced, attempting to restore LLVM_DIR and Clang_DIR after doing find_package(Clang). However, 6775285e7695f2d45cf455f5d31b2c9fa9362d3d added a return if the clangTidy target wasn't found. If this is hit, we don't restore LLVM_DIR and Clang_DIR, which causes strange effects if CMake is rerun a second time. Move the code for restoring LLVM_DIR and Clang_DIR to directly after the find_package calls, to make sure they are restored, regardless of the find_package outcome. (cherry picked from commit 51bceb46f8eeb7c3d060387be315ca41855933c2) --- libcxx/test/tools/clang_tidy_checks/CMakeLists.txt | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt index 0f8f0e8864d0f..da045fac92ce4 100644 --- a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt +++ b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt @@ -8,6 +8,10 @@ set(Clang_DIR_SAVE ${Clang_DIR}) # versions must match. Otherwise there likely will be ODR-violations. This had # led to crashes and incorrect output of the clang-tidy based checks. find_package(Clang ${CMAKE_CXX_COMPILER_VERSION}) + +set(LLVM_DIR "${LLVM_DIR_SAVE}" CACHE PATH "The directory containing a CMake configuration file for LLVM." FORCE) +set(Clang_DIR "${Clang_DIR_SAVE}" CACHE PATH "The directory containing a CMake configuration file for Clang." FORCE) + if(NOT Clang_FOUND) message(STATUS "Clang-tidy tests are disabled since the " "Clang development package is unavailable.") @@ -19,9 +23,6 @@ if(NOT TARGET clangTidy) return() endif() -set(LLVM_DIR "${LLVM_DIR_SAVE}" CACHE PATH "The directory containing a CMake configuration file for LLVM." FORCE) -set(Clang_DIR "${Clang_DIR_SAVE}" CACHE PATH "The directory containing a CMake configuration file for Clang." FORCE) - message(STATUS "Found system-installed LLVM ${LLVM_PACKAGE_VERSION} with headers in ${LLVM_INCLUDE_DIRS}") set(CMAKE_CXX_STANDARD 20) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/132540 >From 774ecf8d94cb7acff292af5b16655a62b8b23201 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Thu, 20 Mar 2025 20:15:07 +0300 Subject: [PATCH 1/2] [BOLT] Gadget scanner: Detect address materialization and arithmetics In addition to authenticated pointers, consider the contents of a register safe if it was * written by PC-relative address computation * updated by an arithmetic instruction whose input address is safe --- bolt/include/bolt/Core/MCPlusBuilder.h| 16 ++ bolt/lib/Passes/PAuthGadgetScanner.cpp| 92 +-- .../Target/AArch64/AArch64MCPlusBuilder.cpp | 30 +++ .../AArch64/gs-pacret-autiasp.s | 15 -- .../gs-pauth-address-materialization.s| 228 ++ .../binary-analysis/AArch64/lit.local.cfg | 3 +- 6 files changed, 345 insertions(+), 39 deletions(-) create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 8b6dc14121480..e94f82d00349a 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -587,6 +587,22 @@ class MCPlusBuilder { return getNoRegister(); } + virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const { +llvm_unreachable("not implemented"); +return getNoRegister(); + } + + /// Analyzes if this instruction can safely perform address arithmetics. + /// + /// If the first element of the returned pair is no-register, this instruction + /// is considered unknown. Otherwise, (output, input) pair is returned, + /// so that output is as trusted as input is. + virtual std::pair + analyzeSafeAddressArithmetics(const MCInst &Inst) const { +llvm_unreachable("not implemented"); +return std::make_pair(getNoRegister(), getNoRegister()); + } + virtual bool isTerminator(const MCInst &Inst) const; virtual bool isNoop(const MCInst &Inst) const { diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index a3b320c545734..16da08551a34d 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -335,6 +335,50 @@ class PacRetAnalysis }); } + BitVector getClobberedRegs(const MCInst &Point) const { +BitVector Clobbered(NumRegs, false); +// Assume a call can clobber all registers, including callee-saved +// registers. There's a good chance that callee-saved registers will be +// saved on the stack at some point during execution of the callee. +// Therefore they should also be considered as potentially modified by an +// attacker/written to. +// Also, not all functions may respect the AAPCS ABI rules about +// caller/callee-saved registers. +if (BC.MIB->isCall(Point)) + Clobbered.set(); +else + BC.MIB->getClobberedRegs(Point, Clobbered); +return Clobbered; + } + + // Returns all registers that can be treated as if they are written by an + // authentication instruction. + SmallVector getAuthenticatedRegs(const MCInst &Point, + const State &Cur) const { +SmallVector Regs; +const MCPhysReg NoReg = BC.MIB->getNoRegister(); + +// A signed pointer can be authenticated, or +ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point); +if (AutReg && *AutReg != NoReg) + Regs.push_back(*AutReg); + +// ... a safe address can be materialized, or +MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point); +if (NewAddrReg != NoReg) + Regs.push_back(NewAddrReg); + +// ... an address can be updated in a safe manner, producing the result +// which is as trusted as the input address. +MCPhysReg ArithResult, ArithSrc; +std::tie(ArithResult, ArithSrc) = +BC.MIB->analyzeSafeAddressArithmetics(Point); +if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc]) + Regs.push_back(ArithResult); + +return Regs; + } + State computeNext(const MCInst &Point, const State &Cur) { PacStatePrinter P(BC); LLVM_DEBUG({ @@ -355,19 +399,20 @@ class PacRetAnalysis return State(); } +// First, compute various properties of the instruction, taking the state +// before its execution into account, if necessary. + +BitVector Clobbered = getClobberedRegs(Point); +// Compute the set of registers that can be considered as written by +// an authentication instruction. This includes operations that are +// *strictly better* than authentication, such as materializing a +// PC-relative constant. +SmallVector AuthenticatedOrBetter = +getAuthenticatedRegs(Point, Cur); + +// Then, compute the state after this instruction is executed. State Next = Cur; -BitVector Clobbered(NumRegs, false); -// Assume a call can
[llvm-branch-commits] [compiler-rt] [ctxprof][nfc] Move 2 implementation functions up in `CtxInstrProfiling.cpp` (PR #133146)
https://github.com/mtrofin ready_for_review https://github.com/llvm/llvm-project/pull/133146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [llvm] RootAutodetect (PR #133147)
https://github.com/mtrofin created https://github.com/llvm/llvm-project/pull/133147 None >From 7182baeef88e3d9448062118fd8af808a17fbcd9 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Mon, 24 Mar 2025 12:01:10 -0700 Subject: [PATCH] RootAutodetect --- compiler-rt/lib/ctx_profile/CMakeLists.txt| 2 +- .../lib/ctx_profile/CtxInstrContextNode.h | 1 + .../lib/ctx_profile/CtxInstrProfiling.cpp | 119 +++--- .../lib/ctx_profile/CtxInstrProfiling.h | 2 +- .../lib/ctx_profile/RootAutoDetector.cpp | 84 + .../lib/ctx_profile/RootAutoDetector.h| 29 + .../TestCases/generate-context.cpp| 4 +- .../llvm/ProfileData/CtxInstrContextNode.h| 1 + 8 files changed, 195 insertions(+), 47 deletions(-) diff --git a/compiler-rt/lib/ctx_profile/CMakeLists.txt b/compiler-rt/lib/ctx_profile/CMakeLists.txt index bb606449c61b1..446ebc96408dd 100644 --- a/compiler-rt/lib/ctx_profile/CMakeLists.txt +++ b/compiler-rt/lib/ctx_profile/CMakeLists.txt @@ -27,7 +27,7 @@ endif() add_compiler_rt_runtime(clang_rt.ctx_profile STATIC ARCHS ${CTX_PROFILE_SUPPORTED_ARCH} - OBJECT_LIBS RTSanitizerCommon RTSanitizerCommonLibc + OBJECT_LIBS RTSanitizerCommon RTSanitizerCommonLibc RTSanitizerCommonSymbolizer CFLAGS ${EXTRA_FLAGS} SOURCES ${CTX_PROFILE_SOURCES} ADDITIONAL_HEADERS ${CTX_PROFILE_HEADERS} diff --git a/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h b/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h index a42bf9ebb01ea..aa052bc7eea6c 100644 --- a/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h +++ b/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h @@ -127,6 +127,7 @@ class ContextNode final { /// MUTEXDECL takes one parameter, the name of a field that is a mutex. #define CTXPROF_FUNCTION_DATA(PTRDECL, VOLATILE_PTRDECL, MUTEXDECL) \ PTRDECL(FunctionData, Next) \ + PTRDECL(void, EntryAddress) \ VOLATILE_PTRDECL(ContextRoot, CtxRoot) \ VOLATILE_PTRDECL(ContextNode, FlatCtx) \ MUTEXDECL(Mutex) diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp index da291e0bbabdd..7e73214e639a3 100644 --- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp +++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp @@ -7,6 +7,7 @@ //===--===// #include "CtxInstrProfiling.h" +#include "RootAutoDetector.h" #include "sanitizer_common/sanitizer_allocator_internal.h" #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_atomic_clang.h" @@ -43,6 +44,12 @@ Arena *FlatCtxArena = nullptr; __thread bool IsUnderContext = false; __sanitizer::atomic_uint8_t ProfilingStarted = {}; +__sanitizer::atomic_uintptr_t RootDetector = {}; +RootAutoDetector *getRootDetector() { + return reinterpret_cast( + __sanitizer::atomic_load_relaxed(&RootDetector)); +} + // utility to taint a pointer by setting the LSB. There is an assumption // throughout that the addresses of contexts are even (really, they should be // align(8), but "even"-ness is the minimum assumption) @@ -201,7 +208,7 @@ ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint, return Ret; } -ContextNode *getFlatProfile(FunctionData &Data, GUID Guid, +ContextNode *getFlatProfile(FunctionData &Data, void *Callee, GUID Guid, uint32_t NumCounters) { if (ContextNode *Existing = Data.FlatCtx) return Existing; @@ -232,6 +239,7 @@ ContextNode *getFlatProfile(FunctionData &Data, GUID Guid, auto *Ret = allocContextNode(AllocBuff, Guid, NumCounters, 0); Data.FlatCtx = Ret; +Data.EntryAddress = Callee; Data.Next = reinterpret_cast( __sanitizer::atomic_load_relaxed(&AllFunctionsData)); while (!__sanitizer::atomic_compare_exchange_strong( @@ -277,8 +285,29 @@ ContextRoot *FunctionData::getOrAllocateContextRoot() { return Root; } -ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid, - uint32_t NumCounters) { +ContextNode *tryStartContextGivenRoot(ContextRoot *Root, GUID Guid, + uint32_t Counters, uint32_t Callsites) +SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + IsUnderContext = true; + __sanitizer::atomic_fetch_add(&Root->TotalEntries, 1, +__sanitizer::memory_order_relaxed); + + if (!Root->FirstMemBlock) { +setupContext(Root, Guid, Counters, Callsites); + } + if (Root->Taken.TryLock()) { +__llvm_ctx_profile_current_context_root = Root; +onContextEnter(*Root->FirstNode); +return Root->FirstNode; + } + // If this thread couldn't take the lock, return scratch context. + __llvm_ctx_profile_current_context_root
[llvm-branch-commits] [compiler-rt] [llvm] RootAutodetect (PR #133147)
mtrofin wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/133147?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#133147** https://app.graphite.dev/github/pr/llvm/llvm-project/133147?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/133147?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#133146** https://app.graphite.dev/github/pr/llvm/llvm-project/133146?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#133106** https://app.graphite.dev/github/pr/llvm/llvm-project/133106?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/133147 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)
@@ -13,7 +12,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 -; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[COPY]](s32) +; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF +; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) nhaehnle wrote: Why are we legalizing this G_ANYEXT to G_MERGE_VALUES, but in `anyext_s1_to_s64_scc` we generate a new `G_ANYEXT`? https://github.com/llvm/llvm-project/pull/132383 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)
https://github.com/mingmingl-llvm updated https://github.com/llvm/llvm-project/pull/129781 >From 072c44f0f9272682480cc2837196a906bd694276 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Fri, 28 Feb 2025 14:41:56 -0800 Subject: [PATCH 1/4] [CodeGen][StaticDataSplitter]Support constant pool partitioning --- llvm/include/llvm/CodeGen/AsmPrinter.h| 8 + .../CodeGen/TargetLoweringObjectFileImpl.h| 6 + .../llvm/Target/TargetLoweringObjectFile.h| 7 + llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp| 22 ++- llvm/lib/CodeGen/StaticDataSplitter.cpp | 56 +-- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 35 + llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 10 ++ llvm/lib/Target/TargetLoweringObjectFile.cpp | 10 ++ llvm/lib/Target/X86/X86AsmPrinter.cpp | 10 ++ .../AArch64/constant-pool-partition.ll| 141 ++ .../CodeGen/X86/constant-pool-partition.ll| 131 11 files changed, 422 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/constant-pool-partition.ll create mode 100644 llvm/test/CodeGen/X86/constant-pool-partition.ll diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 3da63af5ba571..2018f411be796 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -18,6 +18,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -132,6 +134,12 @@ class AsmPrinter : public MachineFunctionPass { /// default, this is equal to CurrentFnSym. MCSymbol *CurrentFnSymForSize = nullptr; + /// Provides the profile information for constants. + const StaticDataProfileInfo *SDPI = nullptr; + + /// The profile summary information. + const ProfileSummaryInfo *PSI = nullptr; + /// Map a basic block section ID to the begin and end symbols of that section /// which determine the section's range. struct MBBSectionRange { diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index 10f0594c267ae..563980fb24ab8 100644 --- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -68,6 +68,12 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { const Constant *C, Align &Alignment) const override; + /// Similar to the function above, but append \p SectionSuffix to the section + /// name. + MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, + const Constant *C, Align &Alignment, + StringRef SectionSuffix) const override; + MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const override; diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h index a5ed1b29dc1bc..1956748b8058b 100644 --- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h +++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h @@ -104,6 +104,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { SectionKind Kind, const Constant *C, Align &Alignment) const; + /// Similar to the function above, but append \p SectionSuffix to the section + /// name. + virtual MCSection *getSectionForConstant(const DataLayout &DL, + SectionKind Kind, const Constant *C, + Align &Alignment, + StringRef SectionSuffix) const; + virtual MCSection * getSectionForMachineBasicBlock(const Function &F, const MachineBasicBlock &MBB, diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 3c4280333e76d..60018afe2f8a7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2791,8 +2791,26 @@ void AsmPrinter::emitConstantPool() { if (!CPE.isMachineConstantPoolEntry()) C = CPE.Val.ConstVal; -MCSection *S = getObjFileLowering().getSectionForConstant( -getDataLayout(), Kind, C, Alignment); +MCSection *S = nullptr; +if (TM.Options.EnableStaticDataPartitioning) { + SmallString<8> SectionNameSuffix; + if (C && SDPI && PSI) { +auto Count = SDPI->getConstantProfileCount(C); +if (Count) { +
[llvm-branch-commits] [compiler-rt] [ctxprof][nfc] Move 2 implementation functions up in `CtxInstrProfiling.cpp` (PR #133146)
https://github.com/mtrofin created https://github.com/llvm/llvm-project/pull/133146 None >From bbe97a86d8cc94b484420db54f735dac8bc818cf Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 26 Mar 2025 10:10:43 -0700 Subject: [PATCH] [ctxprof][nfc] Move 2 implementation functions up in `CtxInstrProfiling.cpp` --- .../lib/ctx_profile/CtxInstrProfiling.cpp | 66 +-- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp index b0e63a8861d86..da291e0bbabdd 100644 --- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp +++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp @@ -244,6 +244,39 @@ ContextNode *getFlatProfile(FunctionData &Data, GUID Guid, return Data.FlatCtx; } +// This should be called once for a Root. Allocate the first arena, set up the +// first context. +void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters, + uint32_t NumCallsites) { + __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock( + &AllContextsMutex); + // Re-check - we got here without having had taken a lock. + if (Root->FirstMemBlock) +return; + const auto Needed = ContextNode::getAllocSize(NumCounters, NumCallsites); + auto *M = Arena::allocateNewArena(getArenaAllocSize(Needed)); + Root->FirstMemBlock = M; + Root->CurrentMem = M; + Root->FirstNode = allocContextNode(M->tryBumpAllocate(Needed), Guid, + NumCounters, NumCallsites); + AllContextRoots.PushBack(Root); +} + +ContextRoot *FunctionData::getOrAllocateContextRoot() { + auto *Root = CtxRoot; + if (Root) +return Root; + __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Mutex); + Root = CtxRoot; + if (!Root) { +Root = new (__sanitizer::InternalAlloc(sizeof(ContextRoot))) ContextRoot(); +CtxRoot = Root; + } + + assert(Root); + return Root; +} + ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid, uint32_t NumCounters) { @@ -333,39 +366,6 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee, return Ret; } -// This should be called once for a Root. Allocate the first arena, set up the -// first context. -void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters, - uint32_t NumCallsites) { - __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock( - &AllContextsMutex); - // Re-check - we got here without having had taken a lock. - if (Root->FirstMemBlock) -return; - const auto Needed = ContextNode::getAllocSize(NumCounters, NumCallsites); - auto *M = Arena::allocateNewArena(getArenaAllocSize(Needed)); - Root->FirstMemBlock = M; - Root->CurrentMem = M; - Root->FirstNode = allocContextNode(M->tryBumpAllocate(Needed), Guid, - NumCounters, NumCallsites); - AllContextRoots.PushBack(Root); -} - -ContextRoot *FunctionData::getOrAllocateContextRoot() { - auto *Root = CtxRoot; - if (Root) -return Root; - __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Mutex); - Root = CtxRoot; - if (!Root) { -Root = new (__sanitizer::InternalAlloc(sizeof(ContextRoot))) ContextRoot(); -CtxRoot = Root; - } - - assert(Root); - return Root; -} - ContextNode *__llvm_ctx_profile_start_context( FunctionData *FData, GUID Guid, uint32_t Counters, uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CI] Move CI over to new project computation script (PR #132642)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/132642 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)
@@ -0,0 +1,131 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +; Tests that constant pool hotness is aggregated across the module. The mingmingl-llvm wrote: Grouped CHECK by functions and used CHECK-NEXT within a function. Also make `@var` used by a hot basic block in `@main`. https://github.com/llvm/llvm-project/pull/129781 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libcxx] [test] Fix restoring LLVM_DIR and Clang_DIR (#132838) (PR #133153)
llvmbot wrote: @mordante What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/133153 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CI] Move CI over to new project computation script (PR #132642)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/132642 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] Track final substitution for Subst* AST nodes (PR #132748)
@@ -265,14 +265,14 @@ int k9 = f9(V9()); // CHECK-ELIDE-TREE: S9< // CHECK-ELIDE-TREE: [2 * ...], // CHECK-ELIDE-TREE: U9< -// CHECK-ELIDE-TREE: [(no qualifiers) != const] double>> shafik wrote: It is not obvious why this changed, can you explain? https://github.com/llvm/llvm-project/pull/132748 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [llvm] RootAutodetect (PR #133147)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff bbe97a86d8cc94b484420db54f735dac8bc818cf 7182baeef88e3d9448062118fd8af808a17fbcd9 --extensions cpp,h -- compiler-rt/lib/ctx_profile/CtxInstrContextNode.h compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp compiler-rt/lib/ctx_profile/CtxInstrProfiling.h compiler-rt/lib/ctx_profile/RootAutoDetector.cpp compiler-rt/lib/ctx_profile/RootAutoDetector.h compiler-rt/test/ctx_profile/TestCases/generate-context.cpp llvm/include/llvm/ProfileData/CtxInstrContextNode.h `` View the diff from clang-format here. ``diff diff --git a/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp b/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp index 5888545a79..50b8f07655 100644 --- a/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp +++ b/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp @@ -42,7 +42,7 @@ void RootAutoDetector::start() { +[](void *Ctx) -> void * { RootAutoDetector *RAD = reinterpret_cast(Ctx); SleepForSeconds(30); -Vector Copy; +Vector Copy; { GenericScopedLock M(&RAD->AllSamplesMutex); Copy.Resize(RAD->AllSamples.Size()); @@ -72,9 +72,7 @@ void RootAutoDetector::start() { this); } -void RootAutoDetector::join() { - pthread_join(WorkerThread, nullptr); -} +void RootAutoDetector::join() { pthread_join(WorkerThread, nullptr); } void RootAutoDetector::sample() { static thread_local bool Entered = false; @@ -90,7 +88,8 @@ void RootAutoDetector::collectStack() { GET_CALLER_PC_BP; BufferedStackTrace CurrentStack; CurrentStack.Unwind(pc, bp, nullptr, false); - if (CurrentStack.size <= 2) return; + if (CurrentStack.size <= 2) +return; static thread_local PerThreadSamples *ThisThreadSamples = new (__sanitizer::InternalAlloc(sizeof(PerThreadSamples))) PerThreadSamples(*this); diff --git a/compiler-rt/lib/ctx_profile/RootAutoDetector.h b/compiler-rt/lib/ctx_profile/RootAutoDetector.h index 254a40b163..f5cecad2f3 100644 --- a/compiler-rt/lib/ctx_profile/RootAutoDetector.h +++ b/compiler-rt/lib/ctx_profile/RootAutoDetector.h @@ -78,7 +78,7 @@ class RootAutoDetector final { }; SpinMutex AllSamplesMutex; SANITIZER_GUARDED_BY(AllSamplesMutex) - Vector AllSamples; + Vector AllSamples; atomic_uintptr_t &FunctionDataListHead; atomic_uintptr_t &Self; void collectStack(); `` https://github.com/llvm/llvm-project/pull/133147 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/20.x: XFAIL malloc_zone.cpp for darwin/lsan (#131234) (PR #133006)
@@ -17,6 +17,8 @@ // UBSan does not install a malloc zone. // XFAIL: ubsan // +// Curently fails on darwin/lsan rdar://145873843 jroelofs wrote: Radar links are appropriate for commit messages, but the community no longer accepts them in comments. https://github.com/llvm/llvm-project/pull/133006 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)
https://github.com/mingmingl-llvm updated https://github.com/llvm/llvm-project/pull/129781 >From 072c44f0f9272682480cc2837196a906bd694276 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Fri, 28 Feb 2025 14:41:56 -0800 Subject: [PATCH 1/3] [CodeGen][StaticDataSplitter]Support constant pool partitioning --- llvm/include/llvm/CodeGen/AsmPrinter.h| 8 + .../CodeGen/TargetLoweringObjectFileImpl.h| 6 + .../llvm/Target/TargetLoweringObjectFile.h| 7 + llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp| 22 ++- llvm/lib/CodeGen/StaticDataSplitter.cpp | 56 +-- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 35 + llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 10 ++ llvm/lib/Target/TargetLoweringObjectFile.cpp | 10 ++ llvm/lib/Target/X86/X86AsmPrinter.cpp | 10 ++ .../AArch64/constant-pool-partition.ll| 141 ++ .../CodeGen/X86/constant-pool-partition.ll| 131 11 files changed, 422 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/constant-pool-partition.ll create mode 100644 llvm/test/CodeGen/X86/constant-pool-partition.ll diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 3da63af5ba571..2018f411be796 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -18,6 +18,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -132,6 +134,12 @@ class AsmPrinter : public MachineFunctionPass { /// default, this is equal to CurrentFnSym. MCSymbol *CurrentFnSymForSize = nullptr; + /// Provides the profile information for constants. + const StaticDataProfileInfo *SDPI = nullptr; + + /// The profile summary information. + const ProfileSummaryInfo *PSI = nullptr; + /// Map a basic block section ID to the begin and end symbols of that section /// which determine the section's range. struct MBBSectionRange { diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index 10f0594c267ae..563980fb24ab8 100644 --- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -68,6 +68,12 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { const Constant *C, Align &Alignment) const override; + /// Similar to the function above, but append \p SectionSuffix to the section + /// name. + MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, + const Constant *C, Align &Alignment, + StringRef SectionSuffix) const override; + MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const override; diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h index a5ed1b29dc1bc..1956748b8058b 100644 --- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h +++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h @@ -104,6 +104,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { SectionKind Kind, const Constant *C, Align &Alignment) const; + /// Similar to the function above, but append \p SectionSuffix to the section + /// name. + virtual MCSection *getSectionForConstant(const DataLayout &DL, + SectionKind Kind, const Constant *C, + Align &Alignment, + StringRef SectionSuffix) const; + virtual MCSection * getSectionForMachineBasicBlock(const Function &F, const MachineBasicBlock &MBB, diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 3c4280333e76d..60018afe2f8a7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2791,8 +2791,26 @@ void AsmPrinter::emitConstantPool() { if (!CPE.isMachineConstantPoolEntry()) C = CPE.Val.ConstVal; -MCSection *S = getObjFileLowering().getSectionForConstant( -getDataLayout(), Kind, C, Alignment); +MCSection *S = nullptr; +if (TM.Options.EnableStaticDataPartitioning) { + SmallString<8> SectionNameSuffix; + if (C && SDPI && PSI) { +auto Count = SDPI->getConstantProfileCount(C); +if (Count) { +
[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)
@@ -0,0 +1,141 @@ +; RUN: llc -mtriple=aarch64 -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true \ +; RUN: -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; Repeat the RUN command above for big-endian systems. +; RUN: llc -mtriple=aarch64_be -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true \ +; RUN: -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; Tests that constant pool hotness is aggregated across the module. The +; static-data-splitter processes data from cold_func first, unprofiled_func +; secondly, and then hot_func. Specifically, tests that +; - If a constant is accessed by hot functions, all constant pools for this +; constant (e.g., from an unprofiled function, or cold function) should have +; `.hot` suffix. +; - Similarly if a constant is accessed by both cold function and un-profiled +; function, constant pools for this constant should not have `.unlikely` suffix. + +; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .LCPI0_0: +; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68005 +; CHECK: .section .rodata.cst8.unlikely,"aM",@progbits,8 +; CHECK: .LCPI0_1: +; CHECK: .xword 0x3fe5eb851eb851ec // double 0.68505 +; CHECK: .section .rodata.cst8,"aM",@progbits,8 +; CHECK: .LCPI0_2: +; CHECK: .byte 0 // 0x0 +; CHECK: .byte 4 // 0x4 +; CHECK: .byte 8 // 0x8 +; CHECK: .byte 12 // 0xc +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff + +; CHECK: .section .rodata.cst8,"aM",@progbits,8 +; CHECK: .LCPI1_0: +; CHECK: .byte 0 // 0x0 +; CHECK: .byte 4 // 0x4 +; CHECK: .byte 8 // 0x8 +; CHECK: .byte 12 // 0xc +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .section.rodata.cst16.hot,"aM",@progbits,16 +; CHECK: .LCPI1_1: +; CHECK: .word 442 // 0x1ba +; CHECK: .word 100 // 0x64 +; CHECK: .word 0 // 0x0 +; CHECK: .word 0 // 0x0 + +; CHECK: .section.rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .LCPI2_0: +; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68005 +; CHECK: .section.rodata.cst16.hot,"aM",@progbits,16 +; CHECK: .LCPI2_1: +; CHECK: .word 442 // 0x1ba +; CHECK: .word 100 // 0x64 +; CHECK: .word 0 // 0x0 +; CHECK: .word 0 // 0x0 + +; CHECK:.section .rodata.cst32,"aM",@progbits,32 +; CHECK:.globl val + +define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 { + %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.80e-01) + %num = tail call i32 (...) @func_taking_arbitrary_param(double 6.850e-01) + %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> ) + %t2 = bitcast <8 x i8> %t1 to <2 x i32> + %3 = extractelement <2 x i32> %t2, i32 1 + %sum = add i32 %2, %3 + %ret = add i32 %sum, %num + ret i32 %ret +} + +declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) +declare i32 @func_taking_arbitrary_param(...) + +define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) { + %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> ) + %t2 = bitcast <8 x i8> %t1 to <4 x i16> + %t3 = zext <4 x i16> %t2 to <4 x i32> + %cmp = icmp ule <4 x i32> , %t3 + ret <4 x i1> %cmp +} + +define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 { + %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.80e-01) + %b = icmp ule <4 x i32> %a, + ret <4 x i1> %b +} + +@val = unnamed_addr constant i256 1 mingmingl-llvm wrote: Updated the test case to use this `@val` in a hot basic block in `@main`. The section suffix remains empty (not `.hot` or `.unlikely`) though, because `@val` has external linkage and the static-data-splitter pass only analyzes local-linkage vars. For
[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)
@@ -0,0 +1,141 @@ +; RUN: llc -mtriple=aarch64 -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true \ +; RUN: -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; Repeat the RUN command above for big-endian systems. +; RUN: llc -mtriple=aarch64_be -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true \ +; RUN: -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; Tests that constant pool hotness is aggregated across the module. The +; static-data-splitter processes data from cold_func first, unprofiled_func +; secondly, and then hot_func. Specifically, tests that +; - If a constant is accessed by hot functions, all constant pools for this +; constant (e.g., from an unprofiled function, or cold function) should have +; `.hot` suffix. +; - Similarly if a constant is accessed by both cold function and un-profiled +; function, constant pools for this constant should not have `.unlikely` suffix. + +; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .LCPI0_0: +; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68005 +; CHECK: .section .rodata.cst8.unlikely,"aM",@progbits,8 +; CHECK: .LCPI0_1: +; CHECK: .xword 0x3fe5eb851eb851ec // double 0.68505 +; CHECK: .section .rodata.cst8,"aM",@progbits,8 +; CHECK: .LCPI0_2: +; CHECK: .byte 0 // 0x0 +; CHECK: .byte 4 // 0x4 +; CHECK: .byte 8 // 0x8 +; CHECK: .byte 12 // 0xc +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff + +; CHECK: .section .rodata.cst8,"aM",@progbits,8 +; CHECK: .LCPI1_0: +; CHECK: .byte 0 // 0x0 +; CHECK: .byte 4 // 0x4 +; CHECK: .byte 8 // 0x8 +; CHECK: .byte 12 // 0xc +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .section.rodata.cst16.hot,"aM",@progbits,16 +; CHECK: .LCPI1_1: +; CHECK: .word 442 // 0x1ba +; CHECK: .word 100 // 0x64 +; CHECK: .word 0 // 0x0 +; CHECK: .word 0 // 0x0 + +; CHECK: .section.rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .LCPI2_0: +; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68005 +; CHECK: .section.rodata.cst16.hot,"aM",@progbits,16 +; CHECK: .LCPI2_1: +; CHECK: .word 442 // 0x1ba +; CHECK: .word 100 // 0x64 +; CHECK: .word 0 // 0x0 +; CHECK: .word 0 // 0x0 + +; CHECK:.section .rodata.cst32,"aM",@progbits,32 +; CHECK:.globl val + +define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 { + %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.80e-01) + %num = tail call i32 (...) @func_taking_arbitrary_param(double 6.850e-01) + %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> ) + %t2 = bitcast <8 x i8> %t1 to <2 x i32> + %3 = extractelement <2 x i32> %t2, i32 1 + %sum = add i32 %2, %3 + %ret = add i32 %sum, %num + ret i32 %ret +} + +declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) +declare i32 @func_taking_arbitrary_param(...) + +define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) { + %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> ) + %t2 = bitcast <8 x i8> %t1 to <4 x i16> + %t3 = zext <4 x i16> %t2 to <4 x i32> + %cmp = icmp ule <4 x i32> , %t3 mingmingl-llvm wrote: Updated unprofiled_func and hot_func so each function has distinct constants. The common `<442, 100, 0, 0>` is constructed to test that `LCPI1_2` and `LCPI2_2` have the same section name and could be merged when linking. https://github.com/llvm/llvm-project/pull/129781 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)
@@ -0,0 +1,141 @@ +; RUN: llc -mtriple=aarch64 -enable-split-machine-functions \ mingmingl-llvm wrote: Currently in the target pass configuration, the data partitioning pass is added inside `TM->Options.EnableMachineFunctionSplitter || EnableMachineFunctionSplitter` (https://github.com/llvm/llvm-project/blob/9224165871cacc568b3895c736ff2a580e1e/llvm/lib/CodeGen/TargetPassConfig.cpp#L1243-L1262), initially to piggyback on the availability of MIRProfile at line 1248. I'll send a follow-up patch to move it outside. https://github.com/llvm/llvm-project/pull/129781 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CI] Move CI over to new project computation script (PR #132642)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/132642 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [llvm] Fix crash when complex deinterleaving operates on an unrolled loop (#129735) (PR #132031)
https://github.com/igogo-x86 approved this pull request. https://github.com/llvm/llvm-project/pull/132031 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [llvm] [ctxprof] root autodetection mechanism (PR #133147)
https://github.com/mtrofin edited https://github.com/llvm/llvm-project/pull/133147 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL] DO NOT MERGE - Resource constructors prototype (PR #132453)
https://github.com/hekota created https://github.com/llvm/llvm-project/pull/132453 None >From 47b41c88a60a7f376070b9ff779ec955eebf523a Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Wed, 12 Mar 2025 17:20:51 -0700 Subject: [PATCH 1/3] [HLSL] Create default resource constructor with BuiltinTypeMethodBuilder --- clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp | 126 ++ 1 file changed, 72 insertions(+), 54 deletions(-) diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp index db0ed3434d837..a52c6a49264c8 100644 --- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp +++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp @@ -89,21 +89,24 @@ struct TemplateParameterListBuilder { // statement (unless the last statement is already a ReturnStmt). struct BuiltinTypeMethodBuilder { private: - struct MethodParam { + struct Param { const IdentifierInfo &NameII; QualType Ty; HLSLParamModifierAttr::Spelling Modifier; -MethodParam(const IdentifierInfo &NameII, QualType Ty, -HLSLParamModifierAttr::Spelling Modifier) +Param(const IdentifierInfo &NameII, QualType Ty, + HLSLParamModifierAttr::Spelling Modifier) : NameII(NameII), Ty(Ty), Modifier(Modifier) {} }; BuiltinTypeDeclBuilder &DeclBuilder; - DeclarationNameInfo NameInfo; + DeclarationName Name; QualType ReturnTy; + // method or constructor declaration (CXXConstructorDecl derives from + // CXXMethodDecl) CXXMethodDecl *Method; bool IsConst; - llvm::SmallVector Params; + bool IsConstructor; + llvm::SmallVector Params; llvm::SmallVector StmtsList; // Argument placeholders, inspired by std::placeholder. These are the indices @@ -122,12 +125,14 @@ struct BuiltinTypeMethodBuilder { friend BuiltinTypeDeclBuilder; BuiltinTypeMethodBuilder(BuiltinTypeDeclBuilder &DB, DeclarationName &Name, - QualType ReturnTy, bool IsConst = false) - : DeclBuilder(DB), NameInfo(DeclarationNameInfo(Name, SourceLocation())), -ReturnTy(ReturnTy), Method(nullptr), IsConst(IsConst) {} - - BuiltinTypeMethodBuilder(BuiltinTypeDeclBuilder &DB, StringRef Name, - QualType ReturnTy, bool IsConst = false); + QualType ReturnTy, bool IsConst = false, + bool IsConstructor = false) + : DeclBuilder(DB), Name(Name), ReturnTy(ReturnTy), Method(nullptr), +IsConst(IsConst), IsConstructor(IsConstructor) {} + + BuiltinTypeMethodBuilder(BuiltinTypeDeclBuilder &DB, StringRef NameStr, + QualType ReturnTy, bool IsConst = false, + bool IsConstructor = false); BuiltinTypeMethodBuilder(const BuiltinTypeMethodBuilder &Other) = delete; ~BuiltinTypeMethodBuilder() { finalizeMethod(); } @@ -148,7 +153,14 @@ struct BuiltinTypeMethodBuilder { Expr *getResourceHandleExpr(); private: - void createMethodDecl(); + void createDecl(); + + // Makes sure the declaration is created; should be called before any + // statement added or when access to 'this' is needed. + void ensureCompleteDecl() { +if (!Method) + createDecl(); + } }; TemplateParameterListBuilder::~TemplateParameterListBuilder() { @@ -323,13 +335,26 @@ Expr *BuiltinTypeMethodBuilder::convertPlaceholder(PlaceHolder PH) { } BuiltinTypeMethodBuilder::BuiltinTypeMethodBuilder(BuiltinTypeDeclBuilder &DB, - StringRef Name, + StringRef NameStr, QualType ReturnTy, - bool IsConst) -: DeclBuilder(DB), ReturnTy(ReturnTy), Method(nullptr), IsConst(IsConst) { - const IdentifierInfo &II = - DB.SemaRef.getASTContext().Idents.get(Name, tok::TokenKind::identifier); - NameInfo = DeclarationNameInfo(DeclarationName(&II), SourceLocation()); + bool IsConst, + bool IsConstructor) +: DeclBuilder(DB), ReturnTy(ReturnTy), Method(nullptr), IsConst(IsConst), + IsConstructor(IsConstructor) { + + assert((!NameStr.empty() || IsConstructor) && "method needs a name"); + assert(((IsConstructor && !IsConst) || !IsConstructor) && + "constructor cannot be const"); + + ASTContext &AST = DB.SemaRef.getASTContext(); + if (IsConstructor) { +Name = AST.DeclarationNames.getCXXConstructorName( +DB.Record->getTypeForDecl()->getCanonicalTypeUnqualified()); + } else { +const IdentifierInfo &II = +AST.Idents.get(NameStr, tok::TokenKind::identifier); +Name = DeclarationName(&II); + } } BuiltinTypeMethodBuilder & @@ -342,13 +367,13 @@ BuiltinTypeMethodBuilder::addParam(StringRef Name, QualType Ty, return *this; } -void BuiltinTypeMethodBuilder::createMethodDecl(
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/132540 >From 72e3de5990635d183b3b39cc55ad4dab5e104a29 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Thu, 20 Mar 2025 20:15:07 +0300 Subject: [PATCH] [BOLT] Gadget scanner: Detect address materialization and arithmetics In addition to authenticated pointers, consider the contents of a register safe if it was * written by PC-relative address computation * updated by an arithmetic instruction whose input address is safe --- bolt/include/bolt/Core/MCPlusBuilder.h| 16 ++ bolt/lib/Passes/PAuthGadgetScanner.cpp| 92 +-- .../Target/AArch64/AArch64MCPlusBuilder.cpp | 30 +++ .../AArch64/gs-pacret-autiasp.s | 15 -- .../gs-pauth-address-materialization.s| 228 ++ .../binary-analysis/AArch64/lit.local.cfg | 3 +- 6 files changed, 345 insertions(+), 39 deletions(-) create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 8b6dc14121480..e94f82d00349a 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -587,6 +587,22 @@ class MCPlusBuilder { return getNoRegister(); } + virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const { +llvm_unreachable("not implemented"); +return getNoRegister(); + } + + /// Analyzes if this instruction can safely perform address arithmetics. + /// + /// If the first element of the returned pair is no-register, this instruction + /// is considered unknown. Otherwise, (output, input) pair is returned, + /// so that output is as trusted as input is. + virtual std::pair + analyzeSafeAddressArithmetics(const MCInst &Inst) const { +llvm_unreachable("not implemented"); +return std::make_pair(getNoRegister(), getNoRegister()); + } + virtual bool isTerminator(const MCInst &Inst) const; virtual bool isNoop(const MCInst &Inst) const { diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index a3b320c545734..16da08551a34d 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -335,6 +335,50 @@ class PacRetAnalysis }); } + BitVector getClobberedRegs(const MCInst &Point) const { +BitVector Clobbered(NumRegs, false); +// Assume a call can clobber all registers, including callee-saved +// registers. There's a good chance that callee-saved registers will be +// saved on the stack at some point during execution of the callee. +// Therefore they should also be considered as potentially modified by an +// attacker/written to. +// Also, not all functions may respect the AAPCS ABI rules about +// caller/callee-saved registers. +if (BC.MIB->isCall(Point)) + Clobbered.set(); +else + BC.MIB->getClobberedRegs(Point, Clobbered); +return Clobbered; + } + + // Returns all registers that can be treated as if they are written by an + // authentication instruction. + SmallVector getAuthenticatedRegs(const MCInst &Point, + const State &Cur) const { +SmallVector Regs; +const MCPhysReg NoReg = BC.MIB->getNoRegister(); + +// A signed pointer can be authenticated, or +ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point); +if (AutReg && *AutReg != NoReg) + Regs.push_back(*AutReg); + +// ... a safe address can be materialized, or +MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point); +if (NewAddrReg != NoReg) + Regs.push_back(NewAddrReg); + +// ... an address can be updated in a safe manner, producing the result +// which is as trusted as the input address. +MCPhysReg ArithResult, ArithSrc; +std::tie(ArithResult, ArithSrc) = +BC.MIB->analyzeSafeAddressArithmetics(Point); +if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc]) + Regs.push_back(ArithResult); + +return Regs; + } + State computeNext(const MCInst &Point, const State &Cur) { PacStatePrinter P(BC); LLVM_DEBUG({ @@ -355,19 +399,20 @@ class PacRetAnalysis return State(); } +// First, compute various properties of the instruction, taking the state +// before its execution into account, if necessary. + +BitVector Clobbered = getClobberedRegs(Point); +// Compute the set of registers that can be considered as written by +// an authentication instruction. This includes operations that are +// *strictly better* than authentication, such as materializing a +// PC-relative constant. +SmallVector AuthenticatedOrBetter = +getAuthenticatedRegs(Point, Cur); + +// Then, compute the state after this instruction is executed. State Next = Cur; -BitVector Clobbered(NumRegs, false); -// Assume a call can clo
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/132540 >From 72e3de5990635d183b3b39cc55ad4dab5e104a29 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Thu, 20 Mar 2025 20:15:07 +0300 Subject: [PATCH] [BOLT] Gadget scanner: Detect address materialization and arithmetics In addition to authenticated pointers, consider the contents of a register safe if it was * written by PC-relative address computation * updated by an arithmetic instruction whose input address is safe --- bolt/include/bolt/Core/MCPlusBuilder.h| 16 ++ bolt/lib/Passes/PAuthGadgetScanner.cpp| 92 +-- .../Target/AArch64/AArch64MCPlusBuilder.cpp | 30 +++ .../AArch64/gs-pacret-autiasp.s | 15 -- .../gs-pauth-address-materialization.s| 228 ++ .../binary-analysis/AArch64/lit.local.cfg | 3 +- 6 files changed, 345 insertions(+), 39 deletions(-) create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 8b6dc14121480..e94f82d00349a 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -587,6 +587,22 @@ class MCPlusBuilder { return getNoRegister(); } + virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const { +llvm_unreachable("not implemented"); +return getNoRegister(); + } + + /// Analyzes if this instruction can safely perform address arithmetics. + /// + /// If the first element of the returned pair is no-register, this instruction + /// is considered unknown. Otherwise, (output, input) pair is returned, + /// so that output is as trusted as input is. + virtual std::pair + analyzeSafeAddressArithmetics(const MCInst &Inst) const { +llvm_unreachable("not implemented"); +return std::make_pair(getNoRegister(), getNoRegister()); + } + virtual bool isTerminator(const MCInst &Inst) const; virtual bool isNoop(const MCInst &Inst) const { diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index a3b320c545734..16da08551a34d 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -335,6 +335,50 @@ class PacRetAnalysis }); } + BitVector getClobberedRegs(const MCInst &Point) const { +BitVector Clobbered(NumRegs, false); +// Assume a call can clobber all registers, including callee-saved +// registers. There's a good chance that callee-saved registers will be +// saved on the stack at some point during execution of the callee. +// Therefore they should also be considered as potentially modified by an +// attacker/written to. +// Also, not all functions may respect the AAPCS ABI rules about +// caller/callee-saved registers. +if (BC.MIB->isCall(Point)) + Clobbered.set(); +else + BC.MIB->getClobberedRegs(Point, Clobbered); +return Clobbered; + } + + // Returns all registers that can be treated as if they are written by an + // authentication instruction. + SmallVector getAuthenticatedRegs(const MCInst &Point, + const State &Cur) const { +SmallVector Regs; +const MCPhysReg NoReg = BC.MIB->getNoRegister(); + +// A signed pointer can be authenticated, or +ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point); +if (AutReg && *AutReg != NoReg) + Regs.push_back(*AutReg); + +// ... a safe address can be materialized, or +MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point); +if (NewAddrReg != NoReg) + Regs.push_back(NewAddrReg); + +// ... an address can be updated in a safe manner, producing the result +// which is as trusted as the input address. +MCPhysReg ArithResult, ArithSrc; +std::tie(ArithResult, ArithSrc) = +BC.MIB->analyzeSafeAddressArithmetics(Point); +if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc]) + Regs.push_back(ArithResult); + +return Regs; + } + State computeNext(const MCInst &Point, const State &Cur) { PacStatePrinter P(BC); LLVM_DEBUG({ @@ -355,19 +399,20 @@ class PacRetAnalysis return State(); } +// First, compute various properties of the instruction, taking the state +// before its execution into account, if necessary. + +BitVector Clobbered = getClobberedRegs(Point); +// Compute the set of registers that can be considered as written by +// an authentication instruction. This includes operations that are +// *strictly better* than authentication, such as materializing a +// PC-relative constant. +SmallVector AuthenticatedOrBetter = +getAuthenticatedRegs(Point, Cur); + +// Then, compute the state after this instruction is executed. State Next = Cur; -BitVector Clobbered(NumRegs, false); -// Assume a call can clo
[llvm-branch-commits] [llvm] [GlobalISel] Combine redundant sext_inreg (PR #131624)
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/131624 >From f4c801437460aef9b9c2e5f49d1e98ec90fadb16 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Mon, 17 Mar 2025 13:54:59 +0100 Subject: [PATCH 1/4] [GlobalISel] Combine redundant sext_inreg --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 + .../include/llvm/Target/GlobalISel/Combine.td | 9 +- .../GlobalISel/CombinerHelperCasts.cpp| 27 +++ .../combine-redundant-sext-inreg.mir | 164 ++ .../combine-sext-trunc-sextinreg.mir | 87 ++ .../CodeGen/AMDGPU/GlobalISel/llvm.abs.ll | 5 - 6 files changed, 289 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 9b78342c8fc39..5778377d125a8 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -994,6 +994,9 @@ class CombinerHelper { // overflow sub bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const; + // (sext_inreg (sext_inreg x, K0), K1) + void applyRedundantSextInReg(MachineInstr &Root, MachineInstr &Other) const; + private: /// Checks for legality of an indexed variant of \p LdSt. bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const; diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 660b03080f92e..6a0ff683a4647 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1849,6 +1849,12 @@ def anyext_of_anyext : ext_of_ext_opcodes; def anyext_of_zext : ext_of_ext_opcodes; def anyext_of_sext : ext_of_ext_opcodes; +def sext_inreg_of_sext_inreg : GICombineRule< + (defs root:$dst), + (match (G_SEXT_INREG $x, $src, $a):$other, + (G_SEXT_INREG $dst, $x, $b):$root), + (apply [{ Helper.applyRedundantSextInReg(*${root}, *${other}); }])>; + // Push cast through build vector. class buildvector_of_opcode : GICombineRule < (defs root:$root, build_fn_matchinfo:$matchinfo), @@ -1896,7 +1902,8 @@ def cast_of_cast_combines: GICombineGroup<[ sext_of_anyext, anyext_of_anyext, anyext_of_zext, - anyext_of_sext + anyext_of_sext, + sext_inreg_of_sext_inreg, ]>; def cast_combines: GICombineGroup<[ diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp index 576fd5fd81703..883a62c308232 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp @@ -378,3 +378,30 @@ bool CombinerHelper::matchCastOfInteger(const MachineInstr &CastMI, return false; } } + +void CombinerHelper::applyRedundantSextInReg(MachineInstr &Root, + MachineInstr &Other) const { + assert(Root.getOpcode() == TargetOpcode::G_SEXT_INREG && + Other.getOpcode() == TargetOpcode::G_SEXT_INREG); + + unsigned RootWidth = Root.getOperand(2).getImm(); + unsigned OtherWidth = Other.getOperand(2).getImm(); + + Register Dst = Root.getOperand(0).getReg(); + Register OtherDst = Other.getOperand(0).getReg(); + Register Src = Other.getOperand(1).getReg(); + + if (RootWidth >= OtherWidth) { +// The root sext_inreg is entirely redundant because the other one +// is narrower. +Observer.changingAllUsesOfReg(MRI, Dst); +MRI.replaceRegWith(Dst, OtherDst); +Observer.finishedChangingAllUsesOfReg(); + } else { +// RootWidth < OtherWidth, rewrite this G_SEXT_INREG with the source of the +// other G_SEXT_INREG. +Builder.buildSExtInReg(Dst, Src, RootWidth); + } + + Root.eraseFromParent(); +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir new file mode 100644 index 0..566ee8e6c338d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir @@ -0,0 +1,164 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: inreg8_inreg16 +tracksRegLiveness: true +body: | + bb.0: +liveins: $vgpr0 +; CHECK-LABEL: name: inreg8_inreg16 +; CHECK: liveins: $vgpr0 +; CHECK-NEXT: {{ $}} +; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0 +; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8 +; CHECK-NEXT: $vgpr0 = COPY %inreg(s32) +%copy:_(s32) = COPY $vgpr0 +%inreg:_(s32) = G_SEXT_INREG %copy, 8 +%inreg1:_(s32) = G_SEXT_INREG %inreg, 16 +$vgpr0 = COPY %inreg1 +... + +
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port AMDGPUSetWavePriority to NPM (PR #130064)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/130064 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)
@@ -2031,17 +2033,19 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, /// scalar value. class VPPartialReductionRecipe : public VPSingleDefRecipe { unsigned Opcode; + unsigned ScaleFactor; NickGuy-Arm wrote: Nit: Could this be `VFScaleFactor` to match the equivalent in `VPReductionPHIRecipe`? https://github.com/llvm/llvm-project/pull/133090 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Reduce register usage for scaled reductions (PR #133090)
@@ -5026,10 +5026,23 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef VFs, // even in the scalar case. RegUsage[ClassID] += 1; } else { +// The output from scaled phis and scaled reductions actually have +// fewer lanes than the VF. +auto VF = VFs[J]; +if (auto *ReductionR = dyn_cast(R)) NickGuy-Arm wrote: [Idle thought, feel free to ignore] I wonder if there's precedent to add a `getVFScaleFactor` or equivalent to the base recipe class (or one of the other subclasses), and allow any recipe to override it instead of explicitly checking for every type that could scale the VF. Likely not yet, and almost certainly not in this patch, but maybe something to consider in the future? https://github.com/llvm/llvm-project/pull/133090 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++] Remove the need for _LIBCPP_TEMPLATE_VIS (PR #133010)
https://github.com/philnik777 ready_for_review https://github.com/llvm/llvm-project/pull/133010 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port AMDGPUSetWavePriority to NPM (PR #130064)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/130064 >From 3dcdf09b2a4635da32ff99208ddad0c27bdc621e Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Thu, 6 Mar 2025 06:07:23 + Subject: [PATCH 1/2] [AMDGPU][NPM] Port AMDGPUSetWavePriority to NPM --- llvm/lib/Target/AMDGPU/AMDGPU.h | 9 +++- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 2 +- .../Target/AMDGPU/AMDGPUSetWavePriority.cpp | 44 ++- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 5 +-- llvm/test/CodeGen/AMDGPU/set-wave-priority.ll | 5 +++ 5 files changed, 48 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 00a807192eb0c..6c7f93c740cf3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -399,6 +399,13 @@ class SILateBranchLoweringPass static bool isRequired() { return true; } }; +class AMDGPUSetWavePriorityPass +: public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + FunctionPass *createAMDGPUAnnotateUniformValuesLegacy(); ModulePass *createAMDGPUPrintfRuntimeBinding(); @@ -511,7 +518,7 @@ void initializeGCNPreRAOptimizationsLegacyPass(PassRegistry &); extern char &GCNPreRAOptimizationsID; FunctionPass *createAMDGPUSetWavePriorityPass(); -void initializeAMDGPUSetWavePriorityPass(PassRegistry &); +void initializeAMDGPUSetWavePriorityLegacyPass(PassRegistry &); void initializeGCNRewritePartialRegUsesLegacyPass(llvm::PassRegistry &); extern char &GCNRewritePartialRegUsesID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 6a45392b5f099..bebb69d765654 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -102,6 +102,7 @@ MACHINE_FUNCTION_PASS("amdgpu-mark-last-scratch-load", AMDGPUMarkLastScratchLoad MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass()) MACHINE_FUNCTION_PASS("amdgpu-reserve-wwm-regs", AMDGPUReserveWWMRegsPass()) MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) +MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass()) MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass()) MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass()) MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass()) @@ -133,7 +134,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass()) #define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) -DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass()) DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass()) // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp index ec9922db9af12..0ff6d27f0e7c9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp @@ -19,6 +19,7 @@ #include "SIInstrInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachinePassManager.h" using namespace llvm; @@ -40,15 +41,11 @@ struct MBBInfo { using MBBInfoSet = DenseMap; -class AMDGPUSetWavePriority : public MachineFunctionPass { +class AMDGPUSetWavePriority { public: static char ID; - AMDGPUSetWavePriority() : MachineFunctionPass(ID) {} - - StringRef getPassName() const override { return "Set wave priority"; } - - bool runOnMachineFunction(MachineFunction &MF) override; + bool run(MachineFunction &MF); private: MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB, @@ -58,15 +55,30 @@ class AMDGPUSetWavePriority : public MachineFunctionPass { const SIInstrInfo *TII; }; +class AMDGPUSetWavePriorityLegacy : public MachineFunctionPass { +public: + static char ID; + + AMDGPUSetWavePriorityLegacy() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { return "Set wave priority"; } + + bool runOnMachineFunction(MachineFunction &MF) override { +if (skipFunction(MF.getFunction())) + return false; +return AMDGPUSetWavePriority().run(MF); + } +}; + } // End anonymous namespace. -INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false, -false) +INITIALIZE_PASS(AMDGPUSetWavePriorityLegacy, DEBUG_TYPE, "Set wave priority", +false, false) -char AMDGPUSetWavePriority::ID = 0; +char AMDGPUSetWavePriorityLegacy::ID = 0; FunctionPass *llvm::createAMDGPUSetWavePriorityPass() {
[llvm-branch-commits] [llvm] release/20.x: [llvm-dlltool] Implement the --identify option (PR #132483)
mstorsjo wrote: > @mstorsjo (or anyone else). If you would like to add a note about this fix in > the release notes (completely optional). Please reply to this comment with a > one or two sentence description of the fix. When you are done, please add the > release:note label to this PR. We could add this bullet to the release notes: - Implemented the `--identify` option in llvm-dlltool https://github.com/llvm/llvm-project/pull/132483 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [PATCH] [clang][modules] Fix serialization and de-serialization of PCH module file refs (#105994) (#132802) (PR #133198)
ChuanqiXu9 wrote: This is a simple fix to a problem with a (relatively) long history. I think it is good to backport this. https://github.com/llvm/llvm-project/pull/133198 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [HEXAGON] Fix semantics of ordered FP compares (#131089) (PR #131270)
androm3da wrote: @iajbar can you review the cherry-pick of this floating point comparison fix? https://github.com/llvm/llvm-project/pull/131270 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang-tools-extra] [clang] support pack expansions for trailing requires clauses (PR #133190)
mizvekov wrote: Actually the current users of the implicit conversion are not good at all, will fix them instead. https://github.com/llvm/llvm-project/pull/133190 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [hexagon] Enable --eh-frame-hdr (#130225) (PR #130678)
androm3da wrote: @iajbar can you review this cherry-pick for the compiler driver? https://github.com/llvm/llvm-project/pull/130678 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libcxx] [test] Fix restoring LLVM_DIR and Clang_DIR (#132838) (PR #133153)
llvmbot wrote: @llvm/pr-subscribers-libcxx Author: None (llvmbot) Changes Backport 51bceb46f8eeb7c3d060387be315ca41855933c2 Requested by: @mstorsjo --- Full diff: https://github.com/llvm/llvm-project/pull/133153.diff 1 Files Affected: - (modified) libcxx/test/tools/clang_tidy_checks/CMakeLists.txt (+4-3) ``diff diff --git a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt index 0f8f0e8864d0f..da045fac92ce4 100644 --- a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt +++ b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt @@ -8,6 +8,10 @@ set(Clang_DIR_SAVE ${Clang_DIR}) # versions must match. Otherwise there likely will be ODR-violations. This had # led to crashes and incorrect output of the clang-tidy based checks. find_package(Clang ${CMAKE_CXX_COMPILER_VERSION}) + +set(LLVM_DIR "${LLVM_DIR_SAVE}" CACHE PATH "The directory containing a CMake configuration file for LLVM." FORCE) +set(Clang_DIR "${Clang_DIR_SAVE}" CACHE PATH "The directory containing a CMake configuration file for Clang." FORCE) + if(NOT Clang_FOUND) message(STATUS "Clang-tidy tests are disabled since the " "Clang development package is unavailable.") @@ -19,9 +23,6 @@ if(NOT TARGET clangTidy) return() endif() -set(LLVM_DIR "${LLVM_DIR_SAVE}" CACHE PATH "The directory containing a CMake configuration file for LLVM." FORCE) -set(Clang_DIR "${Clang_DIR_SAVE}" CACHE PATH "The directory containing a CMake configuration file for Clang." FORCE) - message(STATUS "Found system-installed LLVM ${LLVM_PACKAGE_VERSION} with headers in ${LLVM_INCLUDE_DIRS}") set(CMAKE_CXX_STANDARD 20) `` https://github.com/llvm/llvm-project/pull/133153 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libcxx] [test] Fix restoring LLVM_DIR and Clang_DIR (#132838) (PR #133153)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/133153 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang-tools-extra] [clang] support pack expansions for trailing requires clauses (PR #133190)
https://github.com/mizvekov updated https://github.com/llvm/llvm-project/pull/133190 >From bb164f3a8c86282ff6cb317ff10df21b33b11520 Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Wed, 26 Mar 2025 18:38:34 -0300 Subject: [PATCH] [clang] support pack expansions for trailing requires clauses This fixes a crash when evaluating constraints from trailing requires clauses, when these are part of a generic lambda which is expanded. --- .../refactor/tweaks/ExtractVariable.cpp | 6 +-- clang/docs/ReleaseNotes.rst | 2 + clang/include/clang/AST/ASTNodeTraverser.h| 4 +- clang/include/clang/AST/Decl.h| 36 ++--- clang/include/clang/AST/DeclCXX.h | 20 clang/include/clang/AST/ExprCXX.h | 2 +- clang/include/clang/AST/RecursiveASTVisitor.h | 9 ++-- clang/include/clang/Sema/Sema.h | 14 +++--- clang/lib/AST/ASTContext.cpp | 7 ++- clang/lib/AST/ASTImporter.cpp | 5 +- clang/lib/AST/Decl.cpp| 16 +++--- clang/lib/AST/DeclCXX.cpp | 33 ++-- clang/lib/AST/DeclPrinter.cpp | 10 ++-- clang/lib/AST/DeclTemplate.cpp| 6 +-- clang/lib/AST/ExprCXX.cpp | 2 +- clang/lib/AST/ItaniumMangle.cpp | 4 +- clang/lib/ASTMatchers/ASTMatchFinder.cpp | 3 +- clang/lib/Index/IndexDecl.cpp | 4 +- clang/lib/Sema/SemaConcept.cpp| 6 +-- clang/lib/Sema/SemaDecl.cpp | 24 - clang/lib/Sema/SemaDeclCXX.cpp| 4 +- clang/lib/Sema/SemaFunctionEffects.cpp| 2 +- clang/lib/Sema/SemaLambda.cpp | 18 --- clang/lib/Sema/SemaOverload.cpp | 12 +++-- clang/lib/Sema/SemaTemplateDeductionGuide.cpp | 50 --- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 4 +- clang/lib/Sema/TreeTransform.h| 7 ++- clang/lib/Serialization/ASTReaderDecl.cpp | 2 +- clang/lib/Serialization/ASTWriterDecl.cpp | 5 +- .../SemaCXX/fold_lambda_with_variadics.cpp| 9 clang/tools/libclang/CIndex.cpp | 2 +- 31 files changed, 191 insertions(+), 137 deletions(-) diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp index d84e501b87ce7..90dac3b76c648 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp @@ -100,9 +100,9 @@ computeReferencedDecls(const clang::Expr *Expr) { TraverseLambdaCapture(LExpr, &Capture, Initializer); } - if (clang::Expr *const RequiresClause = - LExpr->getTrailingRequiresClause()) { -TraverseStmt(RequiresClause); + if (const clang::Expr *RequiresClause = + LExpr->getTrailingRequiresClause().ConstraintExpr) { +TraverseStmt(const_cast(RequiresClause)); } for (auto *const TemplateParam : LExpr->getExplicitTemplateParameters()) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 962016c62fc68..9415b29191ddd 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -352,6 +352,8 @@ Bug Fixes to C++ Support - Improved fix for an issue with pack expansions of type constraints, where this now also works if the constraint has non-type or template template parameters. (#GH131798) +- Fix crash when evaluating trailing requires clause of generic lambdas which are part of + a pack expansion. - Fixes matching of nested template template parameters. (#GH130362) - Correctly diagnoses template template paramters which have a pack parameter not in the last position. diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h index f557555e96e59..b16595db07e18 100644 --- a/clang/include/clang/AST/ASTNodeTraverser.h +++ b/clang/include/clang/AST/ASTNodeTraverser.h @@ -534,8 +534,8 @@ class ASTNodeTraverser for (const auto *Parameter : D->parameters()) Visit(Parameter); -if (const Expr *TRC = D->getTrailingRequiresClause()) - Visit(TRC); +if (const AssociatedConstraint &TRC = D->getTrailingRequiresClause()) + Visit(TRC.ConstraintExpr); if (Traversal == TK_IgnoreUnlessSpelledInSource && D->isDefaulted()) return; diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 6557a4c4962ec..64ee8fcff40bc 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -81,13 +81,17 @@ enum class ImplicitParamKind; // Holds a constraint expression along with a pack expansion index, if // expanded. struct AssociatedConstraint { - const Expr *ConstraintExpr; - int ArgumentPackSubstitutionIndex; + const Expr *ConstraintExpr = nullptr; + int ArgumentPackSubstitutionIndex = -1; + + cons
[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)
https://github.com/williamweixiao approved this pull request. https://github.com/llvm/llvm-project/pull/129781 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits