[llvm-branch-commits] [llvm] DAG: Use modf vector libcalls through RuntimeLibcalls (PR #166986)
https://github.com/RKSimon approved this pull request. https://github.com/llvm/llvm-project/pull/166986 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Move expandMultipleResultFPLibCall to TargetLowering (NFC) (PR #166988)
https://github.com/paulwalker-arm approved this pull request. https://github.com/llvm/llvm-project/pull/166988 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS 0-element globals to arrays with UINT32_MAX elements (PR #166952)
https://github.com/jmmartinez updated
https://github.com/llvm/llvm-project/pull/166952
From 8cbc3fd6403aef2cff7dbe585aa8d6762a011ef4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?=
Date: Fri, 7 Nov 2025 12:24:18 +0100
Subject: [PATCH 1/2] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS
0-element globals to arrays with UINT32_MAX elements
In HIP, dynamic LDS globals are represented using 0-element global
arrays in the __shared__ language addressspace.
extern __shared__ LDS[];
These are not representable in SPIRV directly.
To represent them, for AMD, we use an array with UINT32_MAX-elements.
These are reverse translated to 0-element arrays later in AMD's SPIRV runtime
pipeline.
---
llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp | 27 +++
llvm/test/CodeGen/SPIRV/hip_dyn_lds.ll| 20 ++
2 files changed, 47 insertions(+)
create mode 100644 llvm/test/CodeGen/SPIRV/hip_dyn_lds.ll
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
b/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
index c44c53129f1e0..42a9577bb2054 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
@@ -13,6 +13,7 @@
#include "SPIRV.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Module.h"
using namespace llvm;
@@ -43,6 +44,29 @@ bool tryExtendLLVMBitcodeMarker(GlobalVariable &Bitcode) {
return true;
}
+bool tryExtendDynamicLDSGlobal(GlobalVariable &GV) {
+ constexpr unsigned WorkgroupAS = 3;
+ const bool IsWorkgroupExternal =
+ GV.hasExternalLinkage() && GV.getAddressSpace() == WorkgroupAS;
+ if (!IsWorkgroupExternal)
+return false;
+
+ const ArrayType *AT = dyn_cast(GV.getValueType());
+ if (!AT || AT->getNumElements() != 0)
+return false;
+
+ constexpr auto Magic = std::numeric_limits::max();
+ ArrayType *NewAT = ArrayType::get(AT->getElementType(), Magic);
+ GlobalVariable *NewGV = new GlobalVariable(
+ *GV.getParent(), NewAT, GV.isConstant(), GV.getLinkage(), nullptr, "",
+ &GV, GV.getThreadLocalMode(), WorkgroupAS, GV.isExternallyInitialized());
+ NewGV->takeName(&GV);
+ GV.replaceAllUsesWith(NewGV);
+ GV.eraseFromParent();
+
+ return true;
+}
+
bool SPIRVPrepareGlobals::runOnModule(Module &M) {
const bool IsAMD = M.getTargetTriple().getVendor() == Triple::AMD;
if (!IsAMD)
@@ -52,6 +76,9 @@ bool SPIRVPrepareGlobals::runOnModule(Module &M) {
if (GlobalVariable *Bitcode = M.getNamedGlobal("llvm.embedded.module"))
Changed |= tryExtendLLVMBitcodeMarker(*Bitcode);
+ for (GlobalVariable &GV : make_early_inc_range(M.globals()))
+Changed |= tryExtendDynamicLDSGlobal(GV);
+
return Changed;
}
char SPIRVPrepareGlobals::ID = 0;
diff --git a/llvm/test/CodeGen/SPIRV/hip_dyn_lds.ll
b/llvm/test/CodeGen/SPIRV/hip_dyn_lds.ll
new file mode 100644
index 0..f0acfdfdede9d
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hip_dyn_lds.ll
@@ -0,0 +1,20 @@
+; RUN: llc -verify-machineinstrs -mtriple=spirv64-amd-amdhsa %s -o - |
FileCheck %s
+; RUN: %if spirv-tools %{ llc -mtriple=spirv64-amd-amdhsa %s -o -
-filetype=obj | spirv-val %}
+
+; CHECK: OpName %[[#LDS:]] "lds"
+; CHECK: OpDecorate %[[#LDS]] LinkageAttributes "lds" Import
+; CHECK: %[[#UINT:]] = OpTypeInt 32 0
+; CHECK: %[[#UINT_MAX:]] = OpConstant %[[#UINT]] 4294967295
+; CHECK: %[[#LDS_ARR_TY:]] = OpTypeArray %[[#UINT]] %[[#UINT_MAX]]
+; CHECK: %[[#LDS_ARR_PTR_WG:]] = OpTypePointer Workgroup %[[#LDS_ARR_TY]]
+; CHECK: %[[#LDS]] = OpVariable %[[#LDS_ARR_PTR_WG]] Workgroup
+
+@lds = external addrspace(3) global [0 x i32]
+
+define spir_kernel void @foo(ptr addrspace(4) %in, ptr addrspace(4) %out) {
+entry:
+ %val = load i32, ptr addrspace(4) %in
+ %add = add i32 %val, 1
+ store i32 %add, ptr addrspace(4) %out
+ ret void
+}
From 0376c3e6457061bea6ec16cb9df6789b93cac69f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?=
Date: Mon, 10 Nov 2025 11:08:22 +0100
Subject: [PATCH 2/2] [Review] Rename Magic->UInt32Max
---
llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
b/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
index 42a9577bb2054..2b4349e5d9e39 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
@@ -55,8 +55,8 @@ bool tryExtendDynamicLDSGlobal(GlobalVariable &GV) {
if (!AT || AT->getNumElements() != 0)
return false;
- constexpr auto Magic = std::numeric_limits::max();
- ArrayType *NewAT = ArrayType::get(AT->getElementType(), Magic);
+ constexpr auto UInt32Max = std::numeric_limits::max();
+ ArrayType *NewAT = ArrayType::get(AT->getElementType(), UInt32Max);
GlobalVariable *NewGV = new GlobalVariable(
*GV.getParent(), NewAT, GV.isConstant(), GV.getLinkage(), nullptr, "",
&GV, GV.getThreadLocalMode(), WorkgroupAS, GV.isExternallyInit
[llvm-branch-commits] [llvm] [LoopVectorize][NFC] Refactor widening decision logic (PR #140722)
@@ -5617,14 +5618,8 @@ void
LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
}
// We assume that widening is the best solution when possible.
- if (memoryInstructionCanBeWidened(&I, VF)) {
-InstructionCost Cost = getConsecutiveMemOpCost(&I, VF);
-int ConsecutiveStride = Legal->isConsecutivePtr(
-getLoadStoreType(&I), getLoadStorePointerOperand(&I));
-assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
- "Expected consecutive stride.");
-InstWidening Decision =
-ConsecutiveStride == 1 ? CM_Widen : CM_Widen_Reverse;
+ if (auto Decision = memoryInstructionCanBeWidened(&I, VF)) {
david-arm wrote:
I'm a bit worried about assuming CM_Unknown is always 0. I think it's better to
be more explicit, i.e.
```
InstWidening Decision = memoryInstructionCanBeWidened(&I, VF);
if (Decision != CM_Unknown) {
...
```
https://github.com/llvm/llvm-project/pull/140722
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Only fold flat offsets if they are inbounds PTRADDs (PR #165427)
ritter-x2a wrote: Ping. https://github.com/llvm/llvm-project/pull/165427 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Handle AssertNoFPClass in computeKnownBits (PR #167289)
@@ -4121,6 +4121,27 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op,
const APInt &DemandedElts,
Known.One.clearLowBits(LogOfAlign);
break;
}
+ case ISD::AssertNoFPClass: {
+Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
dtcxzyw wrote:
Does SDAG's version of `computeKnownBits` also work for floating-point values?
https://github.com/llvm/llvm-project/pull/167289
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Handle AssertNoFPClass in computeKnownBits (PR #167289)
@@ -4121,6 +4121,27 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op,
const APInt &DemandedElts,
Known.One.clearLowBits(LogOfAlign);
break;
}
+ case ISD::AssertNoFPClass: {
+Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+FPClassTest NoFPClass =
+static_cast(Op.getConstantOperandVal(1));
+const FPClassTest NegativeTestMask = fcNan | fcNegative;
+if ((NoFPClass & NegativeTestMask) == NegativeTestMask) {
+ // Cannot be negative.
+ Known.Zero.setSignBit();
dtcxzyw wrote:
Use `makeNegative/makeNonNegative`. If there are conflict bits the value is
poison.
https://github.com/llvm/llvm-project/pull/167289
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline tests for copysign with known signmask input (PR #167265)
https://github.com/dtcxzyw approved this pull request. https://github.com/llvm/llvm-project/pull/167265 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add definitions for vector math functions (PR #167026)
@@ -182,10 +182,63 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"]
in {
def MODF_#FPTy : RuntimeLibcall;
}
-foreach VecTy = ["V4F32", "V2F64", "NXV4F32", "NXV2F64"] in {
- def MODF_#VecTy : RuntimeLibcall;
- def SINCOS_#VecTy : RuntimeLibcall;
- def SINCOSPI_#VecTy : RuntimeLibcall;
+defvar F32VectorSuffixes = ["V2F32", "V4F32", "V8F32", "V16F32", "NXV4F32"];
+defvar F64VectorSuffixes = ["V2F64", "V4F64", "V8F64", "NXV2F64"];
paulwalker-arm wrote:
What's gained from adding so many incomplete definitions though? This might
cause a lot of churn as RuntimeLibcall capabilities that require many of these
definitions to be updated? Does it hamper your progress if say a couple of each
type is added instead?
https://github.com/llvm/llvm-project/pull/167026
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add definitions for vector math functions (PR #167026)
https://github.com/paulwalker-arm edited https://github.com/llvm/llvm-project/pull/167026 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add entries for vector sincospi functions (PR #166981)
@@ -151,9 +188,69 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const
Triple &TT,
fcNegNormal));
return {FuncTy, Attrs};
}
+ case RTLIB::impl__ZGVnN4vl4l4_sincospif:
+ case RTLIB::impl__ZGVnN2vl8l8_sincospi:
+ case RTLIB::impl__ZGVsNxvl4l4_sincospif:
+ case RTLIB::impl__ZGVsNxvl8l8_sincospi:
+ case RTLIB::impl_armpl_vsincospiq_f32:
+ case RTLIB::impl_armpl_vsincospiq_f64:
+ case RTLIB::impl_armpl_svsincospi_f32_x:
+ case RTLIB::impl_armpl_svsincospi_f64_x: {
+AttrBuilder FuncAttrBuilder(Ctx);
+
+bool IsF32 = LibcallImpl == RTLIB::impl__ZGVnN4vl4l4_sincospif ||
+ LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif ||
+ LibcallImpl == RTLIB::impl_armpl_vsincospiq_f32 ||
+ LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x;
+Type *ScalarTy = IsF32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx);
+unsigned EC = IsF32 ? 4 : 2;
+
+bool IsScalable = LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif ||
+ LibcallImpl == RTLIB::impl__ZGVsNxvl8l8_sincospi ||
+ LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x ||
+ LibcallImpl == RTLIB::impl_armpl_svsincospi_f64_x;
+Type *VecTy =
+IsScalable ? static_cast(ScalableVectorType::get(ScalarTy, EC))
+ : static_cast(FixedVectorType::get(ScalarTy, EC));
paulwalker-arm wrote:
```suggestion
VectorType *VecTy = VectorType::get(ScalarTy, EC, IsScalable);
```
assuming EC is an unsigned rather than ElementCount because it make things less
readable?
https://github.com/llvm/llvm-project/pull/166981
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add entries for vector sincospi functions (PR #166981)
@@ -151,9 +188,69 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const
Triple &TT,
fcNegNormal));
return {FuncTy, Attrs};
}
+ case RTLIB::impl__ZGVnN4vl4l4_sincospif:
+ case RTLIB::impl__ZGVnN2vl8l8_sincospi:
+ case RTLIB::impl__ZGVsNxvl4l4_sincospif:
+ case RTLIB::impl__ZGVsNxvl8l8_sincospi:
+ case RTLIB::impl_armpl_vsincospiq_f32:
+ case RTLIB::impl_armpl_vsincospiq_f64:
+ case RTLIB::impl_armpl_svsincospi_f32_x:
+ case RTLIB::impl_armpl_svsincospi_f64_x: {
+AttrBuilder FuncAttrBuilder(Ctx);
+
+bool IsF32 = LibcallImpl == RTLIB::impl__ZGVnN4vl4l4_sincospif ||
+ LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif ||
+ LibcallImpl == RTLIB::impl_armpl_vsincospiq_f32 ||
+ LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x;
+Type *ScalarTy = IsF32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx);
+unsigned EC = IsF32 ? 4 : 2;
+
+bool IsScalable = LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif ||
+ LibcallImpl == RTLIB::impl__ZGVsNxvl8l8_sincospi ||
+ LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x ||
+ LibcallImpl == RTLIB::impl_armpl_svsincospi_f64_x;
+Type *VecTy =
+IsScalable ? static_cast(ScalableVectorType::get(ScalarTy, EC))
+ : static_cast(FixedVectorType::get(ScalarTy, EC));
+
+for (Attribute::AttrKind Attr : CommonFnAttrs)
+ FuncAttrBuilder.addAttribute(Attr);
+FuncAttrBuilder.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Mod));
+
+AttributeList Attrs;
+Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder);
+
+{
+ AttrBuilder ArgAttrBuilder(Ctx);
+ for (Attribute::AttrKind AK : CommonPtrArgAttrs)
+ArgAttrBuilder.addAttribute(AK);
+ ArgAttrBuilder.addAlignmentAttr(DL.getABITypeAlign(VecTy));
+ Attrs = Attrs.addParamAttributes(Ctx, 1, ArgAttrBuilder);
+ Attrs = Attrs.addParamAttributes(Ctx, 2, ArgAttrBuilder);
+}
+
+PointerType *PtrTy = PointerType::get(Ctx, 0);
+SmallVector ArgTys = {VecTy, PtrTy, PtrTy};
+if (IsScalable && hasVectorMaskArgument(LibcallImpl))
+ ArgTys.push_back(ScalableVectorType::get(Type::getInt1Ty(Ctx), EC));
+
+return {FunctionType::get(Type::getVoidTy(Ctx), ArgTys, false), Attrs};
+ }
default:
return {};
}
return {};
}
+
+bool RuntimeLibcallsInfo::hasVectorMaskArgument(RTLIB::LibcallImpl Impl) {
+ /// FIXME: This should be generated by tablegen and support the argument at
an
+ /// arbitrary position
paulwalker-arm wrote:
The library functions within TargetLibraryInfo include a VFABI string to
provide this information. Perhaps that's not a good fit? not least because in
order to decode a full FunctionType requires the scalar function declaration
the vector function is a variant of. I just figured it's worth mentioning so
that we can share any good bits.
https://github.com/llvm/llvm-project/pull/166981
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add entries for vector sincospi functions (PR #166981)
https://github.com/paulwalker-arm edited https://github.com/llvm/llvm-project/pull/166981 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add entries for vector sincospi functions (PR #166981)
https://github.com/paulwalker-arm commented: Does RuntimeLibcalls include calling convention information? ArmPL's NEON functions (those prefixed by `armpl_v`) do not use the standard calling convention. They are functionally backwards compatible but they'll be a performance hit. This is not necessarily an issue for this patch, assuming it doesn't affect the existing TargetLibraryInfo lookups, but I'll feel happier knowing what the support/plan is for different calling conventions. https://github.com/llvm/llvm-project/pull/166981 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add entries for vector sincospi functions (PR #166981)
@@ -151,9 +188,69 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const
Triple &TT,
fcNegNormal));
return {FuncTy, Attrs};
}
+ case RTLIB::impl__ZGVnN4vl4l4_sincospif:
+ case RTLIB::impl__ZGVnN2vl8l8_sincospi:
+ case RTLIB::impl__ZGVsNxvl4l4_sincospif:
+ case RTLIB::impl__ZGVsNxvl8l8_sincospi:
+ case RTLIB::impl_armpl_vsincospiq_f32:
+ case RTLIB::impl_armpl_vsincospiq_f64:
+ case RTLIB::impl_armpl_svsincospi_f32_x:
+ case RTLIB::impl_armpl_svsincospi_f64_x: {
+AttrBuilder FuncAttrBuilder(Ctx);
+
+bool IsF32 = LibcallImpl == RTLIB::impl__ZGVnN4vl4l4_sincospif ||
+ LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif ||
+ LibcallImpl == RTLIB::impl_armpl_vsincospiq_f32 ||
+ LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x;
+Type *ScalarTy = IsF32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx);
+unsigned EC = IsF32 ? 4 : 2;
+
+bool IsScalable = LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif ||
+ LibcallImpl == RTLIB::impl__ZGVsNxvl8l8_sincospi ||
+ LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x ||
+ LibcallImpl == RTLIB::impl_armpl_svsincospi_f64_x;
+Type *VecTy =
+IsScalable ? static_cast(ScalableVectorType::get(ScalarTy, EC))
+ : static_cast(FixedVectorType::get(ScalarTy, EC));
+
+for (Attribute::AttrKind Attr : CommonFnAttrs)
+ FuncAttrBuilder.addAttribute(Attr);
+FuncAttrBuilder.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Mod));
+
+AttributeList Attrs;
+Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder);
+
+{
+ AttrBuilder ArgAttrBuilder(Ctx);
+ for (Attribute::AttrKind AK : CommonPtrArgAttrs)
+ArgAttrBuilder.addAttribute(AK);
+ ArgAttrBuilder.addAlignmentAttr(DL.getABITypeAlign(VecTy));
+ Attrs = Attrs.addParamAttributes(Ctx, 1, ArgAttrBuilder);
+ Attrs = Attrs.addParamAttributes(Ctx, 2, ArgAttrBuilder);
+}
+
+PointerType *PtrTy = PointerType::get(Ctx, 0);
+SmallVector ArgTys = {VecTy, PtrTy, PtrTy};
+if (IsScalable && hasVectorMaskArgument(LibcallImpl))
+ ArgTys.push_back(ScalableVectorType::get(Type::getInt1Ty(Ctx), EC));
paulwalker-arm wrote:
```suggestion
if (hasVectorMaskArgument(LibcallImpl))
ArgTys.push_back(VectorType::get(Type::getInt1Ty(Ctx), EC, IsScalable));
```
https://github.com/llvm/llvm-project/pull/166981
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopVectorize][NFC] Refactor widening decision logic (PR #140722)
@@ -5617,14 +5618,8 @@ void
LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
}
// We assume that widening is the best solution when possible.
- if (memoryInstructionCanBeWidened(&I, VF)) {
-InstructionCost Cost = getConsecutiveMemOpCost(&I, VF);
-int ConsecutiveStride = Legal->isConsecutivePtr(
-getLoadStoreType(&I), getLoadStorePointerOperand(&I));
-assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
david-arm wrote:
This assert can be moved to `memoryInstructionCanBeWidened`.
https://github.com/llvm/llvm-project/pull/140722
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopVectorize][NFC] Refactor widening decision logic (PR #140722)
@@ -5183,17 +5187,15 @@
LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
return Cost;
}
-InstructionCost
-LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
-ElementCount VF) {
+InstructionCost LoopVectorizationCostModel::getConsecutiveMemOpCost(
+Instruction *I, ElementCount VF, InstWidening Decision) {
Type *ValTy = getLoadStoreType(I);
auto *VectorTy = cast(toVectorTy(ValTy, VF));
- Value *Ptr = getLoadStorePointerOperand(I);
unsigned AS = getLoadStoreAddressSpace(I);
- int ConsecutiveStride = Legal->isConsecutivePtr(ValTy, Ptr);
+ enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
david-arm wrote:
I don't think this is correct because we sometimes optimise for size. You can
delete this line, since LoopVectorizationCostModel already has a CostKind
member.
https://github.com/llvm/llvm-project/pull/140722
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopVectorize][NFC] Refactor widening decision logic (PR #140722)
@@ -1240,9 +1240,10 @@ class LoopVectorizationCostModel {
getDivRemSpeculationCost(Instruction *I,
ElementCount VF) const;
- /// Returns true if \p I is a memory instruction with consecutive memory
- /// access that can be widened.
- bool memoryInstructionCanBeWidened(Instruction *I, ElementCount VF);
+ /// Returns widening decision (CM_Widen or CM_Widen_Reverse) if \p I is a
+ /// memory instruction with consecutive access that can be widened, or
+ /// CM_Unknown otherwise.
+ InstWidening memoryInstructionCanBeWidened(Instruction *I, ElementCount VF);
david-arm wrote:
This is just a suggestion, but given this function is only called from one
place - getConsecutiveMemOpCost - I wonder if it's worth deleting and simply
moving the logic directly into getConsecutiveMemOpCost?
https://github.com/llvm/llvm-project/pull/140722
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopVectorize][NFC] Refactor widening decision logic (PR #140722)
@@ -2988,30 +2990,32 @@ bool
LoopVectorizationCostModel::interleavedAccessCanBeWidened(
: TTI.isLegalMaskedStore(Ty, Alignment, AS);
}
-bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
-Instruction *I, ElementCount VF) {
+LoopVectorizationCostModel::InstWidening
+LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
+ ElementCount VF) {
// Get and ensure we have a valid memory instruction.
assert((isa(I)) && "Invalid memory instruction");
auto *Ptr = getLoadStorePointerOperand(I);
auto *ScalarTy = getLoadStoreType(I);
// In order to be widened, the pointer should be consecutive, first of all.
- if (!Legal->isConsecutivePtr(ScalarTy, Ptr))
-return false;
+ auto Stride = Legal->isConsecutivePtr(ScalarTy, Ptr);
+ if (!Stride)
+return CM_Unknown;
// If the instruction is a store located in a predicated block, it will be
// scalarized.
if (isScalarWithPredication(I, VF))
-return false;
+return CM_Unknown;
// If the instruction's allocated size doesn't equal it's type size, it
// requires padding and will be scalarized.
auto &DL = I->getDataLayout();
if (hasIrregularType(ScalarTy, DL))
-return false;
+return CM_Unknown;
- return true;
+ return Stride == 1 ? CM_Widen : CM_Widen_Reverse;
david-arm wrote:
Is it worth adding an assert that Stride is 1 or -1, since you're assuming that
when the stride is not 1 it must be -1?
https://github.com/llvm/llvm-project/pull/140722
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Improve InsertNegateRAStatePass::inferUnknownStates (PR #163381)
https://github.com/bgergely0 updated
https://github.com/llvm/llvm-project/pull/163381
From 6219f74fb2d51008bc1ff953822a7e446eea2e09 Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Tue, 7 Oct 2025 14:01:47 +
Subject: [PATCH 1/4] [BOLT] Improve
InsertNegateRAStatePass::inferUnknownStates
Previous implementation used a simple heuristic. This can be improved in
several ways:
- If a BasicBlock has instruction both with known RAState and unknown RAState,
use the known states to work out the unknown ones.
- If a BasicBlock only consists of instructions with unknown RAState,
use the last known RAState from its predecessors, or the first known
from its successors to set the RAStates in the BasicBlock. This includes
error checking: all predecessors/successors should have the same RAState.
- Some BasicBlocks may only contain instructions with unknown RAState,
and have no CFG neighbors. These already have incorrect unwind info.
For these, we copy the last known RAState based on the layout order.
Updated bolt/docs/PacRetDesign.md to reflect changes.
---
bolt/docs/PacRetDesign.md | 23 +-
.../bolt/Passes/InsertNegateRAStatePass.h | 34 ++-
bolt/lib/Passes/InsertNegateRAStatePass.cpp | 221 --
3 files changed, 252 insertions(+), 26 deletions(-)
diff --git a/bolt/docs/PacRetDesign.md b/bolt/docs/PacRetDesign.md
index f3fe5fbd522cb..c7c76cac3a100 100644
--- a/bolt/docs/PacRetDesign.md
+++ b/bolt/docs/PacRetDesign.md
@@ -200,16 +200,29 @@ This pass runs after optimizations. It performns the
_inverse_ of MarkRAState pa
Some BOLT passes can add new Instructions. In InsertNegateRAStatePass, we have
to know what RA state these have.
-The current solution has the `inferUnknownStates` function to cover these,
using
-a fairly simple strategy: unknown states inherit the last known state.
-
-This will be updated to a more robust solution.
-
> [!important]
> As issue #160989 describes, unwind info is incorrect in stubs with multiple
> callers.
> For this same reason, we cannot generate correct pac-specific unwind info:
> the signess
> of the _incorrect_ return address is meaningless.
+Assignment of RAStates to newly generated instructions is done in
`inferUnknownStates`.
+We have three different cases to cover:
+
+1. If a BasicBlock has some instructions with known RA state, and some
without, we
+ can copy the RAState of known instructions to the unknown ones. As the
control
+ flow only changes between BasicBlocks, instructions in the same BasicBlock
have the
+ same return address.
+
+2. If all instructions in a BasicBlock are unknown, we can look at all CFG
neighbors
+ (that is predecessors/successors). The RAState should be the same as of the
+ neighboring blocks. Conflicting RAStates in neighbors indicate an error.
Such
+ functions should be ignored.
+
+3. If a BasicBlock has no CFG neighbors, we have to copy the RAState of the
previous
+BasicBlock in layout order.
+
+If any BasicBlocks remain with unknown instructions, the function will be
ignored.
+
### Optimizations requiring special attention
Marking states before optimizations ensure that instructions can be moved
around
diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
index 836948bf5e9c0..b4b428207b657 100644
--- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
+++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
@@ -1,4 +1,4 @@
-//===- bolt/Passes/InsertNegateRAStatePass.cpp
===//
+//===- bolt/Passes/InsertNegateRAStatePass.h
--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -30,9 +30,39 @@ class InsertNegateRAState : public BinaryFunctionPass {
private:
/// Because states are tracked as MCAnnotations on individual instructions,
/// newly inserted instructions do not have a state associated with them.
- /// New states are "inherited" from the last known state.
void inferUnknownStates(BinaryFunction &BF);
+ /// Simple case: copy RAStates to unknown insts from previous inst.
+ /// Account for signing and authenticating insts.
+ void fillUnknownStateInBB(BinaryContext &BC, BinaryBasicBlock &BB);
+
+ /// Fill unknown RAStates in BBs with no successors/predecessors. These are
+ /// Stubs inserted by LongJmp. As of #160989, we have to copy the RAState
from
+ /// the previous BB in the layout, because CFIs are already incorrect here.
+ void fillUnknownStubs(BinaryFunction &BF);
+
+ /// Fills unknowns RAStates of BBs with successors/predecessors. Uses
+ /// getRAStateByCFG to determine the RAState. Does more than one iteration if
+ /// needed. Reports an error, if it cannot find the RAState for all BBs with
+ /// predecessors/successors.
+ void fillUnknownBlocksInCFG(BinaryFunction &BF);
+
+ /// For
[llvm-branch-commits] [llvm] [BOLT][NFC] Rename Pointer Auth DWARF rewriter passes (PR #164622)
https://github.com/bgergely0 updated
https://github.com/llvm/llvm-project/pull/164622
From 4745d5d3bc440547c3b9559b0a083f8638413266 Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Wed, 22 Oct 2025 12:44:37 +
Subject: [PATCH 1/3] [BOLT][NFC] Rename Pointer Auth DWARF rewriter passes
Original names were "working titles". After initial patches are merged,
I'd like to rename these passes to names that reflect their intent
better and show their relationship to each other:
InsertNegateRAStatePass renamed to PointerAuthCFIFixup,
MarkRAStates renamed to PointerAuthCFIAnalyzer.
---
bolt/docs/PacRetDesign.md | 23 ++---
...arkRAStates.h => PointerAuthCFIAnalyzer.h} | 14
...ateRAStatePass.h => PointerAuthCFIFixup.h} | 14
bolt/lib/Core/Exceptions.cpp | 8 ++---
bolt/lib/Passes/CMakeLists.txt| 4 +--
...AStates.cpp => PointerAuthCFIAnalyzer.cpp} | 16 +-
...AStatePass.cpp => PointerAuthCFIFixup.cpp} | 32 +--
bolt/lib/Rewrite/BinaryPassManager.cpp| 8 ++---
bolt/test/AArch64/negate-ra-state-incorrect.s | 2 +-
bolt/test/AArch64/negate-ra-state.s | 8 ++---
bolt/test/AArch64/pacret-split-funcs.s| 4 +--
bolt/unittests/Passes/CMakeLists.txt | 2 +-
...ateRAState.cpp => PointerAuthCFIFixup.cpp} | 6 ++--
.../gn/secondary/bolt/lib/Passes/BUILD.gn | 4 +--
14 files changed, 73 insertions(+), 72 deletions(-)
rename bolt/include/bolt/Passes/{MarkRAStates.h => PointerAuthCFIAnalyzer.h}
(63%)
rename bolt/include/bolt/Passes/{InsertNegateRAStatePass.h =>
PointerAuthCFIFixup.h} (87%)
rename bolt/lib/Passes/{MarkRAStates.cpp => PointerAuthCFIAnalyzer.cpp} (91%)
rename bolt/lib/Passes/{InsertNegateRAStatePass.cpp =>
PointerAuthCFIFixup.cpp} (91%)
rename bolt/unittests/Passes/{InsertNegateRAState.cpp =>
PointerAuthCFIFixup.cpp} (97%)
diff --git a/bolt/docs/PacRetDesign.md b/bolt/docs/PacRetDesign.md
index c7c76cac3a100..0de2da50f8fd6 100644
--- a/bolt/docs/PacRetDesign.md
+++ b/bolt/docs/PacRetDesign.md
@@ -104,9 +104,9 @@ negate-ra-state CFIs will become invalid during BasicBlock
reordering.
## Solution design
The implementation introduces two new passes:
-1. `MarkRAStatesPass`: assigns the RA state to each instruction based on the
CFIs
-in the input binary
-2. `InsertNegateRAStatePass`: reads those assigned instruction RA states after
+1. `PointerAuthCFIAnalyzer`: assigns the RA state to each instruction based on
+the CFI in the input binary
+2. `PointerAuthCFIFixup`: reads those assigned instruction RA states after
optimizations, and emits `DW_CFA_AARCH64_negate_ra_state` CFIs at the
correct
places: wherever there is a state change between two consecutive
instructions
in the layout order.
@@ -129,7 +129,7 @@ instruction.
This special case is handled by adding an `initialRAState` bool to each
BinaryFunction.
If the `Offset` the CFI refers to is zero, we don't store an annotation, but
set
the `initialRAState` in `FillCFIInfoFor`. This information is then used in
-`MarkRAStates`.
+`PointerAuthCFIAnalyzer`.
### Binaries without DWARF info
@@ -146,7 +146,7 @@ In summary:
- pointer auth is used, and we have DWARF CFIs: passes run, and rewrite the
negate-ra-state CFI.
-### MarkRAStates pass
+### PointerAuthCFIAnalyzer pass
This pass runs before optimizations reorder anything.
@@ -173,9 +173,9 @@ what we have before the pass, and after it.
| autiasp | negate-ra-state | signed |
| ret | | unsigned |
-# Error handling in MarkRAState Pass:
+# Error handling in PointerAuthCFIAnalyzer pass:
-Whenever the MarkRAStates pass finds inconsistencies in the current
+Whenever the PointerAuthCFIAnalyzer pass finds inconsistencies in the current
BinaryFunction, it marks the function as ignored using `BF.setIgnored()`. BOLT
will not optimize this function but will emit it unchanged in the original
section
(`.bolt.org.text`).
@@ -188,16 +188,17 @@ The inconsistencies are as follows:
Users will be informed about the number of ignored functions in the pass, the
exact functions ignored, and the found inconsistency.
-### InsertNegateRAStatePass
+### PointerAuthCFIFixup
-This pass runs after optimizations. It performns the _inverse_ of MarkRAState
pa s:
+This pass runs after optimizations. It performns the _inverse_ of
PointerAuthCFIAnalyzer
+pass:
1. it reads the RA state annotations attached to the instructions, and
2. whenever the state changes, it adds a PseudoInstruction that holds an
OpNegateRAState CFI.
# Covering newly generated instructions:
-Some BOLT passes can add new Instructions. In InsertNegateRAStatePass, we have
+Some BOLT passes can add new Instructions. In PointerAuthCFIFixup, we have
to know what RA state these have.
> [!important]
@@ -230,7 +231,7 @@ freely. The only special case is function splitting. W
[llvm-branch-commits] [llvm] [BOLT] Improve InsertNegateRAStatePass::inferUnknownStates (PR #163381)
https://github.com/bgergely0 updated
https://github.com/llvm/llvm-project/pull/163381
From 6219f74fb2d51008bc1ff953822a7e446eea2e09 Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Tue, 7 Oct 2025 14:01:47 +
Subject: [PATCH 1/4] [BOLT] Improve
InsertNegateRAStatePass::inferUnknownStates
Previous implementation used a simple heuristic. This can be improved in
several ways:
- If a BasicBlock has instruction both with known RAState and unknown RAState,
use the known states to work out the unknown ones.
- If a BasicBlock only consists of instructions with unknown RAState,
use the last known RAState from its predecessors, or the first known
from its successors to set the RAStates in the BasicBlock. This includes
error checking: all predecessors/successors should have the same RAState.
- Some BasicBlocks may only contain instructions with unknown RAState,
and have no CFG neighbors. These already have incorrect unwind info.
For these, we copy the last known RAState based on the layout order.
Updated bolt/docs/PacRetDesign.md to reflect changes.
---
bolt/docs/PacRetDesign.md | 23 +-
.../bolt/Passes/InsertNegateRAStatePass.h | 34 ++-
bolt/lib/Passes/InsertNegateRAStatePass.cpp | 221 --
3 files changed, 252 insertions(+), 26 deletions(-)
diff --git a/bolt/docs/PacRetDesign.md b/bolt/docs/PacRetDesign.md
index f3fe5fbd522cb..c7c76cac3a100 100644
--- a/bolt/docs/PacRetDesign.md
+++ b/bolt/docs/PacRetDesign.md
@@ -200,16 +200,29 @@ This pass runs after optimizations. It performns the
_inverse_ of MarkRAState pa
Some BOLT passes can add new Instructions. In InsertNegateRAStatePass, we have
to know what RA state these have.
-The current solution has the `inferUnknownStates` function to cover these,
using
-a fairly simple strategy: unknown states inherit the last known state.
-
-This will be updated to a more robust solution.
-
> [!important]
> As issue #160989 describes, unwind info is incorrect in stubs with multiple
> callers.
> For this same reason, we cannot generate correct pac-specific unwind info:
> the signess
> of the _incorrect_ return address is meaningless.
+Assignment of RAStates to newly generated instructions is done in
`inferUnknownStates`.
+We have three different cases to cover:
+
+1. If a BasicBlock has some instructions with known RA state, and some
without, we
+ can copy the RAState of known instructions to the unknown ones. As the
control
+ flow only changes between BasicBlocks, instructions in the same BasicBlock
have the
+ same return address.
+
+2. If all instructions in a BasicBlock are unknown, we can look at all CFG
neighbors
+ (that is predecessors/successors). The RAState should be the same as of the
+ neighboring blocks. Conflicting RAStates in neighbors indicate an error.
Such
+ functions should be ignored.
+
+3. If a BasicBlock has no CFG neighbors, we have to copy the RAState of the
previous
+BasicBlock in layout order.
+
+If any BasicBlocks remain with unknown instructions, the function will be
ignored.
+
### Optimizations requiring special attention
Marking states before optimizations ensure that instructions can be moved
around
diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
index 836948bf5e9c0..b4b428207b657 100644
--- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
+++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
@@ -1,4 +1,4 @@
-//===- bolt/Passes/InsertNegateRAStatePass.cpp
===//
+//===- bolt/Passes/InsertNegateRAStatePass.h
--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -30,9 +30,39 @@ class InsertNegateRAState : public BinaryFunctionPass {
private:
/// Because states are tracked as MCAnnotations on individual instructions,
/// newly inserted instructions do not have a state associated with them.
- /// New states are "inherited" from the last known state.
void inferUnknownStates(BinaryFunction &BF);
+ /// Simple case: copy RAStates to unknown insts from previous inst.
+ /// Account for signing and authenticating insts.
+ void fillUnknownStateInBB(BinaryContext &BC, BinaryBasicBlock &BB);
+
+ /// Fill unknown RAStates in BBs with no successors/predecessors. These are
+ /// Stubs inserted by LongJmp. As of #160989, we have to copy the RAState
from
+ /// the previous BB in the layout, because CFIs are already incorrect here.
+ void fillUnknownStubs(BinaryFunction &BF);
+
+ /// Fills unknowns RAStates of BBs with successors/predecessors. Uses
+ /// getRAStateByCFG to determine the RAState. Does more than one iteration if
+ /// needed. Reports an error, if it cannot find the RAState for all BBs with
+ /// predecessors/successors.
+ void fillUnknownBlocksInCFG(BinaryFunction &BF);
+
+ /// For
[llvm-branch-commits] [llvm] [BOLT][NFC] Rename Pointer Auth DWARF rewriter passes (PR #164622)
https://github.com/bgergely0 updated
https://github.com/llvm/llvm-project/pull/164622
From 4745d5d3bc440547c3b9559b0a083f8638413266 Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Wed, 22 Oct 2025 12:44:37 +
Subject: [PATCH 1/3] [BOLT][NFC] Rename Pointer Auth DWARF rewriter passes
Original names were "working titles". After initial patches are merged,
I'd like to rename these passes to names that reflect their intent
better and show their relationship to each other:
InsertNegateRAStatePass renamed to PointerAuthCFIFixup,
MarkRAStates renamed to PointerAuthCFIAnalyzer.
---
bolt/docs/PacRetDesign.md | 23 ++---
...arkRAStates.h => PointerAuthCFIAnalyzer.h} | 14
...ateRAStatePass.h => PointerAuthCFIFixup.h} | 14
bolt/lib/Core/Exceptions.cpp | 8 ++---
bolt/lib/Passes/CMakeLists.txt| 4 +--
...AStates.cpp => PointerAuthCFIAnalyzer.cpp} | 16 +-
...AStatePass.cpp => PointerAuthCFIFixup.cpp} | 32 +--
bolt/lib/Rewrite/BinaryPassManager.cpp| 8 ++---
bolt/test/AArch64/negate-ra-state-incorrect.s | 2 +-
bolt/test/AArch64/negate-ra-state.s | 8 ++---
bolt/test/AArch64/pacret-split-funcs.s| 4 +--
bolt/unittests/Passes/CMakeLists.txt | 2 +-
...ateRAState.cpp => PointerAuthCFIFixup.cpp} | 6 ++--
.../gn/secondary/bolt/lib/Passes/BUILD.gn | 4 +--
14 files changed, 73 insertions(+), 72 deletions(-)
rename bolt/include/bolt/Passes/{MarkRAStates.h => PointerAuthCFIAnalyzer.h}
(63%)
rename bolt/include/bolt/Passes/{InsertNegateRAStatePass.h =>
PointerAuthCFIFixup.h} (87%)
rename bolt/lib/Passes/{MarkRAStates.cpp => PointerAuthCFIAnalyzer.cpp} (91%)
rename bolt/lib/Passes/{InsertNegateRAStatePass.cpp =>
PointerAuthCFIFixup.cpp} (91%)
rename bolt/unittests/Passes/{InsertNegateRAState.cpp =>
PointerAuthCFIFixup.cpp} (97%)
diff --git a/bolt/docs/PacRetDesign.md b/bolt/docs/PacRetDesign.md
index c7c76cac3a100..0de2da50f8fd6 100644
--- a/bolt/docs/PacRetDesign.md
+++ b/bolt/docs/PacRetDesign.md
@@ -104,9 +104,9 @@ negate-ra-state CFIs will become invalid during BasicBlock
reordering.
## Solution design
The implementation introduces two new passes:
-1. `MarkRAStatesPass`: assigns the RA state to each instruction based on the
CFIs
-in the input binary
-2. `InsertNegateRAStatePass`: reads those assigned instruction RA states after
+1. `PointerAuthCFIAnalyzer`: assigns the RA state to each instruction based on
+the CFI in the input binary
+2. `PointerAuthCFIFixup`: reads those assigned instruction RA states after
optimizations, and emits `DW_CFA_AARCH64_negate_ra_state` CFIs at the
correct
places: wherever there is a state change between two consecutive
instructions
in the layout order.
@@ -129,7 +129,7 @@ instruction.
This special case is handled by adding an `initialRAState` bool to each
BinaryFunction.
If the `Offset` the CFI refers to is zero, we don't store an annotation, but
set
the `initialRAState` in `FillCFIInfoFor`. This information is then used in
-`MarkRAStates`.
+`PointerAuthCFIAnalyzer`.
### Binaries without DWARF info
@@ -146,7 +146,7 @@ In summary:
- pointer auth is used, and we have DWARF CFIs: passes run, and rewrite the
negate-ra-state CFI.
-### MarkRAStates pass
+### PointerAuthCFIAnalyzer pass
This pass runs before optimizations reorder anything.
@@ -173,9 +173,9 @@ what we have before the pass, and after it.
| autiasp | negate-ra-state | signed |
| ret | | unsigned |
-# Error handling in MarkRAState Pass:
+# Error handling in PointerAuthCFIAnalyzer pass:
-Whenever the MarkRAStates pass finds inconsistencies in the current
+Whenever the PointerAuthCFIAnalyzer pass finds inconsistencies in the current
BinaryFunction, it marks the function as ignored using `BF.setIgnored()`. BOLT
will not optimize this function but will emit it unchanged in the original
section
(`.bolt.org.text`).
@@ -188,16 +188,17 @@ The inconsistencies are as follows:
Users will be informed about the number of ignored functions in the pass, the
exact functions ignored, and the found inconsistency.
-### InsertNegateRAStatePass
+### PointerAuthCFIFixup
-This pass runs after optimizations. It performns the _inverse_ of MarkRAState
pa s:
+This pass runs after optimizations. It performns the _inverse_ of
PointerAuthCFIAnalyzer
+pass:
1. it reads the RA state annotations attached to the instructions, and
2. whenever the state changes, it adds a PseudoInstruction that holds an
OpNegateRAState CFI.
# Covering newly generated instructions:
-Some BOLT passes can add new Instructions. In InsertNegateRAStatePass, we have
+Some BOLT passes can add new Instructions. In PointerAuthCFIFixup, we have
to know what RA state these have.
> [!important]
@@ -230,7 +231,7 @@ freely. The only special case is function splitting. W
[llvm-branch-commits] [llvm] [BOLT][PAC] Warn about synchronous unwind tables (PR #165227)
https://github.com/bgergely0 updated
https://github.com/llvm/llvm-project/pull/165227
From 169733615e6a995d7fd74393c078f15e017b67bc Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Mon, 27 Oct 2025 09:29:54 +
Subject: [PATCH 1/2] [BOLT][PAC] Warn about synchronous unwind tables
BOLT currently ignores functions with synchronous PAuth DWARF info.
When more than 10% of functions get ignored for inconsistencies, we
should emit a warning to only use asynchronous unwind tables.
See also: #165215
---
bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp| 9 -
.../AArch64/pacret-synchronous-unwind.cpp | 33 +++
2 files changed, 41 insertions(+), 1 deletion(-)
create mode 100644 bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
diff --git a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
index 91030544d2b88..01af88818a21d 100644
--- a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
+++ b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
@@ -133,11 +133,18 @@ Error
PointerAuthCFIAnalyzer::runOnFunctions(BinaryContext &BC) {
ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
SkipPredicate, "PointerAuthCFIAnalyzer");
+
+ float IgnoredPercent = (100.0 * FunctionsIgnored) / Total;
BC.outs() << "BOLT-INFO: PointerAuthCFIAnalyzer ran on " << Total
<< " functions. Ignored " << FunctionsIgnored << " functions "
-<< format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total)
+<< format("(%.2lf%%)", IgnoredPercent)
<< " because of CFI inconsistencies\n";
+ if (IgnoredPercent >= 10.0)
+BC.outs() << "BOLT-WARNING: PointerAuthCFIAnalyzer only supports "
+ "asynchronous unwind tables. For C compilers, see "
+ "-fasynchronous-unwind-tables.\n";
+
return Error::success();
}
diff --git a/bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
b/bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
new file mode 100644
index 0..1bfeeaed3715a
--- /dev/null
+++ b/bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
@@ -0,0 +1,33 @@
+// Test to demonstrate that functions compiled with synchronous unwind tables
+// are ignored by the PointerAuthCFIAnalyzer.
+// Exception handling is needed to have _any_ unwind tables, otherwise the
+// PointerAuthCFIAnalyzer does not run on these functions, so it does not
ignore
+// any function.
+//
+// REQUIRES: system-linux,bolt-runtime
+//
+// RUN: %clangxx --target=aarch64-unknown-linux-gnu \
+// RUN: -mbranch-protection=pac-ret \
+// RUN: -fno-asynchronous-unwind-tables \
+// RUN: %s -o %t.exe -Wl,-q
+// RUN: llvm-bolt %t.exe -o %t.bolt | FileCheck %s --check-prefix=CHECK
+//
+// CHECK: PointerAuthCFIAnalyzer ran on 3 functions. Ignored
+// CHECK-NOT: 0 functions (0.00%) because of CFI inconsistencies
+// CHECK-SAME: 1 functions (33.33%) because of CFI inconsistencies
+// CHECK-NEXT: BOLT-WARNING: PointerAuthCFIAnalyzer only supports asynchronous
+// CHECK-SAME: unwind tables. For C compilers, see
-fasynchronous-unwind-tables.
+
+#include
+#include
+
+void foo() { throw std::runtime_error("Exception from foo()."); }
+
+int main() {
+ try {
+foo();
+ } catch (const std::exception &e) {
+printf("Exception caught: %s\n", e.what());
+ }
+ return 0;
+}
From c08f8bad6fd9640b407ce5cb446f74ffbda1b0bb Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Tue, 28 Oct 2025 09:23:08 +
Subject: [PATCH 2/2] [BOLT] Use opts::Verbosity in PointerAuthCFIAnalyzer
---
bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp| 27 ---
bolt/test/AArch64/negate-ra-state-incorrect.s | 2 +-
2 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
index 01af88818a21d..5979d5fb01818 100644
--- a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
+++ b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
@@ -28,6 +28,10 @@
using namespace llvm;
+namespace opts {
+extern llvm::cl::opt Verbosity;
+} // namespace opts
+
namespace llvm {
namespace bolt {
@@ -43,9 +47,10 @@ bool PointerAuthCFIAnalyzer::runOnFunction(BinaryFunction
&BF) {
// Not all functions have .cfi_negate_ra_state in them. But if one
does,
// we expect psign/pauth instructions to have the hasNegateRAState
// annotation.
-BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
- << BF.getPrintName()
- << ": ptr sign/auth inst without .cfi_negate_ra_state\n";
+if (opts::Verbosity >= 1)
+ BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
+<< BF.getPrintName()
+<< ": ptr sign/auth inst without .cfi_negate_ra_state\n";
std::lock_guard Lock(IgnoreMutex);
BF.setIgnored();
return false;
@@ -65,9 +70,10 @@ bool PointerAuthCFIAnalyzer::run
[llvm-branch-commits] [llvm] [BOLT][PAC] Warn about synchronous unwind tables (PR #165227)
https://github.com/bgergely0 updated
https://github.com/llvm/llvm-project/pull/165227
From 169733615e6a995d7fd74393c078f15e017b67bc Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Mon, 27 Oct 2025 09:29:54 +
Subject: [PATCH 1/2] [BOLT][PAC] Warn about synchronous unwind tables
BOLT currently ignores functions with synchronous PAuth DWARF info.
When more than 10% of functions get ignored for inconsistencies, we
should emit a warning to only use asynchronous unwind tables.
See also: #165215
---
bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp| 9 -
.../AArch64/pacret-synchronous-unwind.cpp | 33 +++
2 files changed, 41 insertions(+), 1 deletion(-)
create mode 100644 bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
diff --git a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
index 91030544d2b88..01af88818a21d 100644
--- a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
+++ b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
@@ -133,11 +133,18 @@ Error
PointerAuthCFIAnalyzer::runOnFunctions(BinaryContext &BC) {
ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
SkipPredicate, "PointerAuthCFIAnalyzer");
+
+ float IgnoredPercent = (100.0 * FunctionsIgnored) / Total;
BC.outs() << "BOLT-INFO: PointerAuthCFIAnalyzer ran on " << Total
<< " functions. Ignored " << FunctionsIgnored << " functions "
-<< format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total)
+<< format("(%.2lf%%)", IgnoredPercent)
<< " because of CFI inconsistencies\n";
+ if (IgnoredPercent >= 10.0)
+BC.outs() << "BOLT-WARNING: PointerAuthCFIAnalyzer only supports "
+ "asynchronous unwind tables. For C compilers, see "
+ "-fasynchronous-unwind-tables.\n";
+
return Error::success();
}
diff --git a/bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
b/bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
new file mode 100644
index 0..1bfeeaed3715a
--- /dev/null
+++ b/bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
@@ -0,0 +1,33 @@
+// Test to demonstrate that functions compiled with synchronous unwind tables
+// are ignored by the PointerAuthCFIAnalyzer.
+// Exception handling is needed to have _any_ unwind tables, otherwise the
+// PointerAuthCFIAnalyzer does not run on these functions, so it does not
ignore
+// any function.
+//
+// REQUIRES: system-linux,bolt-runtime
+//
+// RUN: %clangxx --target=aarch64-unknown-linux-gnu \
+// RUN: -mbranch-protection=pac-ret \
+// RUN: -fno-asynchronous-unwind-tables \
+// RUN: %s -o %t.exe -Wl,-q
+// RUN: llvm-bolt %t.exe -o %t.bolt | FileCheck %s --check-prefix=CHECK
+//
+// CHECK: PointerAuthCFIAnalyzer ran on 3 functions. Ignored
+// CHECK-NOT: 0 functions (0.00%) because of CFI inconsistencies
+// CHECK-SAME: 1 functions (33.33%) because of CFI inconsistencies
+// CHECK-NEXT: BOLT-WARNING: PointerAuthCFIAnalyzer only supports asynchronous
+// CHECK-SAME: unwind tables. For C compilers, see
-fasynchronous-unwind-tables.
+
+#include
+#include
+
+void foo() { throw std::runtime_error("Exception from foo()."); }
+
+int main() {
+ try {
+foo();
+ } catch (const std::exception &e) {
+printf("Exception caught: %s\n", e.what());
+ }
+ return 0;
+}
From c08f8bad6fd9640b407ce5cb446f74ffbda1b0bb Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Tue, 28 Oct 2025 09:23:08 +
Subject: [PATCH 2/2] [BOLT] Use opts::Verbosity in PointerAuthCFIAnalyzer
---
bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp| 27 ---
bolt/test/AArch64/negate-ra-state-incorrect.s | 2 +-
2 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
index 01af88818a21d..5979d5fb01818 100644
--- a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
+++ b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
@@ -28,6 +28,10 @@
using namespace llvm;
+namespace opts {
+extern llvm::cl::opt Verbosity;
+} // namespace opts
+
namespace llvm {
namespace bolt {
@@ -43,9 +47,10 @@ bool PointerAuthCFIAnalyzer::runOnFunction(BinaryFunction
&BF) {
// Not all functions have .cfi_negate_ra_state in them. But if one
does,
// we expect psign/pauth instructions to have the hasNegateRAState
// annotation.
-BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
- << BF.getPrintName()
- << ": ptr sign/auth inst without .cfi_negate_ra_state\n";
+if (opts::Verbosity >= 1)
+ BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
+<< BF.getPrintName()
+<< ": ptr sign/auth inst without .cfi_negate_ra_state\n";
std::lock_guard Lock(IgnoreMutex);
BF.setIgnored();
return false;
@@ -65,9 +70,10 @@ bool PointerAuthCFIAnalyzer::run
[llvm-branch-commits] [llvm] DAG: Handle AssertNoFPClass in computeKnownBits (PR #167289)
@@ -4121,6 +4121,27 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op,
const APInt &DemandedElts,
Known.One.clearLowBits(LogOfAlign);
break;
}
+ case ISD::AssertNoFPClass: {
+Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RKSimon wrote:
Mainly just through bitcasts to wrapped integer logic etc. But X86 does
successfully use it for some of its FP node types
https://github.com/llvm/llvm-project/pull/167289
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Add AssertNoFPClass from call return attributes (PR #167264)
https://github.com/dtcxzyw approved this pull request. LG https://github.com/llvm/llvm-project/pull/167264 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AArch64] Decompose FADDV with known zero elements (PR #167313)
https://github.com/guy-david created
https://github.com/llvm/llvm-project/pull/167313
FADDV is matched into FADDPv4f32 + FADDPv2i32p but this can be relaxed when one
element (usually the 4th) or more are known to be zero.
Before:
```
movi d1, #
mov v0.s[3], v1.s[0]
faddp v0.4s, v0.4s, v0.4s
faddp s0, v0.2s
```
After:
```
mov s1, v0.s[2]
faddp s0, v0.2s
fadd s0, s0, s1
```
When all of the elements are zero, the intrinsic now simply reduces into a
constant instead of emitting two additions.
>From 7a1406efe148f888a784851bd4268d227041d588 Mon Sep 17 00:00:00 2001
From: Guy David
Date: Sun, 9 Nov 2025 17:55:53 +0200
Subject: [PATCH] [AArch64] Decompose faddv with known zero elements
FADDV is matched into FADDPv4f32 + FADDPv2i32p but this can be relaxed
when one element (usually the 4th) or more are known to be zero.
Before:
movi d1, #
mov v0.s[3], v1.s[0]
faddp v0.4s, v0.4s, v0.4s
faddp s0, v0.2s
After:
mov s1, v0.s[2]
faddp s0, v0.2s
fadd s0, s0, s1
---
.../Target/AArch64/AArch64ISelLowering.cpp| 58 +
llvm/test/CodeGen/AArch64/faddv.ll| 82 +++
2 files changed, 140 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/faddv.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 132afc27135e9..b4bf97e27bca4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -22790,6 +22790,62 @@ static SDValue combineSVEBitSel(unsigned IID, SDNode
*N, SelectionDAG &DAG) {
}
}
+/// Optimize patterns where we insert zeros into vector lanes before faddv.
+static SDValue tryCombineFADDVWithZero(SDNode *N, SelectionDAG &DAG) {
+ assert(getIntrinsicID(N) == Intrinsic::aarch64_neon_faddv &&
+ "Expected NEON faddv intrinsic");
+ SDLoc DL(N);
+ SDValue Vec = N->getOperand(1);
+ EVT VT = Vec.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ APInt DemandedElts = APInt::getAllOnes(NumElts);
+ APInt KnownZeroElts = DAG.computeVectorKnownZeroElements(Vec, DemandedElts);
+ unsigned NumZeroElts = KnownZeroElts.popcount();
+ // No element is known to be +0.0, fallback to the TableGen pattern.
+ if (NumZeroElts == 0)
+return SDValue();
+ // All elements are +0.0, just return zero.
+ if (NumZeroElts == NumElts)
+return DAG.getConstantFP(0.0, DL, EltVT);
+
+ // At least one element is +0.0, so it is worth to decompose the reduction
+ // into fadd's. FADDV is a pairwise reduction, so we need to respect the
+ // order of the elements in the vector.
+
+ // Check if we can output a signed zero.
+ // This avoid the scenario where all the added values are -0.0 except the
+0.0
+ // element we chose to ignore.
+ SDNodeFlags Flags = N->getFlags();
+ bool IsSignedZeroSafe = Flags.hasNoSignedZeros() ||
+ DAG.allUsesSignedZeroInsensitive(SDValue(N, 0));
+ if (!IsSignedZeroSafe)
+return SDValue();
+
+ // Extract all elements.
+ SmallVector Elts;
+ for (unsigned I = 0; I < NumElts; I++) {
+Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
+ DAG.getConstant(I, DL, MVT::i64)));
+ }
+ // Perform pairwise reduction.
+ while (Elts.size() > 1) {
+SmallVector NewElts;
+for (unsigned I = 0; I < Elts.size(); I += 2) {
+ if (!KnownZeroElts[I] && !KnownZeroElts[I + 1]) {
+NewElts.push_back(
+DAG.getNode(ISD::FADD, DL, EltVT, Elts[I], Elts[I + 1]));
+ } else if (KnownZeroElts[I]) {
+NewElts.push_back(Elts[I + 1]);
+ } else if (KnownZeroElts[I + 1]) {
+NewElts.push_back(Elts[I]);
+ }
+}
+Elts = std::move(NewElts);
+ }
+ return Elts[0];
+}
+
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -22813,6 +22869,8 @@ static SDValue performIntrinsicCombine(SDNode *N,
return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
case Intrinsic::aarch64_neon_umaxv:
return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
+ case Intrinsic::aarch64_neon_faddv:
+return tryCombineFADDVWithZero(N, DAG);
case Intrinsic::aarch64_neon_fmax:
return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
diff --git a/llvm/test/CodeGen/AArch64/faddv.ll
b/llvm/test/CodeGen/AArch64/faddv.ll
new file mode 100644
index 0..e4a3781150cf7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/faddv.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+; Test element at index 0 is zero.
+define float @test_v2f32_element_0_zero(<2 x float> %vec) {
+; CHECK-LABEL: test_v
[llvm-branch-commits] [llvm] [AArch64] Decompose FADDV with known zero elements (PR #167313)
llvmbot wrote:
@llvm/pr-subscribers-backend-aarch64
Author: Guy David (guy-david)
Changes
FADDV is matched into FADDPv4f32 + FADDPv2i32p but this can be relaxed when one
element (usually the 4th) or more are known to be zero.
Before:
```
movi d1, #
mov v0.s[3], v1.s[0]
faddp v0.4s, v0.4s, v0.4s
faddp s0, v0.2s
```
After:
```
mov s1, v0.s[2]
faddp s0, v0.2s
fadd s0, s0, s1
```
When all of the elements are zero, the intrinsic now simply reduces into a
constant instead of emitting two additions.
---
Full diff: https://github.com/llvm/llvm-project/pull/167313.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+58)
- (added) llvm/test/CodeGen/AArch64/faddv.ll (+82)
``diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 132afc27135e9..b4bf97e27bca4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -22790,6 +22790,62 @@ static SDValue combineSVEBitSel(unsigned IID, SDNode
*N, SelectionDAG &DAG) {
}
}
+/// Optimize patterns where we insert zeros into vector lanes before faddv.
+static SDValue tryCombineFADDVWithZero(SDNode *N, SelectionDAG &DAG) {
+ assert(getIntrinsicID(N) == Intrinsic::aarch64_neon_faddv &&
+ "Expected NEON faddv intrinsic");
+ SDLoc DL(N);
+ SDValue Vec = N->getOperand(1);
+ EVT VT = Vec.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ APInt DemandedElts = APInt::getAllOnes(NumElts);
+ APInt KnownZeroElts = DAG.computeVectorKnownZeroElements(Vec, DemandedElts);
+ unsigned NumZeroElts = KnownZeroElts.popcount();
+ // No element is known to be +0.0, fallback to the TableGen pattern.
+ if (NumZeroElts == 0)
+return SDValue();
+ // All elements are +0.0, just return zero.
+ if (NumZeroElts == NumElts)
+return DAG.getConstantFP(0.0, DL, EltVT);
+
+ // At least one element is +0.0, so it is worth to decompose the reduction
+ // into fadd's. FADDV is a pairwise reduction, so we need to respect the
+ // order of the elements in the vector.
+
+ // Check if we can output a signed zero.
+ // This avoid the scenario where all the added values are -0.0 except the
+0.0
+ // element we chose to ignore.
+ SDNodeFlags Flags = N->getFlags();
+ bool IsSignedZeroSafe = Flags.hasNoSignedZeros() ||
+ DAG.allUsesSignedZeroInsensitive(SDValue(N, 0));
+ if (!IsSignedZeroSafe)
+return SDValue();
+
+ // Extract all elements.
+ SmallVector Elts;
+ for (unsigned I = 0; I < NumElts; I++) {
+Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
+ DAG.getConstant(I, DL, MVT::i64)));
+ }
+ // Perform pairwise reduction.
+ while (Elts.size() > 1) {
+SmallVector NewElts;
+for (unsigned I = 0; I < Elts.size(); I += 2) {
+ if (!KnownZeroElts[I] && !KnownZeroElts[I + 1]) {
+NewElts.push_back(
+DAG.getNode(ISD::FADD, DL, EltVT, Elts[I], Elts[I + 1]));
+ } else if (KnownZeroElts[I]) {
+NewElts.push_back(Elts[I + 1]);
+ } else if (KnownZeroElts[I + 1]) {
+NewElts.push_back(Elts[I]);
+ }
+}
+Elts = std::move(NewElts);
+ }
+ return Elts[0];
+}
+
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -22813,6 +22869,8 @@ static SDValue performIntrinsicCombine(SDNode *N,
return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
case Intrinsic::aarch64_neon_umaxv:
return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
+ case Intrinsic::aarch64_neon_faddv:
+return tryCombineFADDVWithZero(N, DAG);
case Intrinsic::aarch64_neon_fmax:
return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
diff --git a/llvm/test/CodeGen/AArch64/faddv.ll
b/llvm/test/CodeGen/AArch64/faddv.ll
new file mode 100644
index 0..e4a3781150cf7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/faddv.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+; Test element at index 0 is zero.
+define float @test_v2f32_element_0_zero(<2 x float> %vec) {
+; CHECK-LABEL: test_v2f32_element_0_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT:// kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:mov s0, v0.s[1]
+; CHECK-NEXT:ret
+entry:
+ %with_zero = insertelement <2 x float> %vec, float 0.0, i64 0
+ %sum = call nsz float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float>
%with_zero)
+ ret float %sum
+}
+
+; Test element at index 3 is zero.
+define float @test_v4f32_element_3_zero(<4 x float> %vec) {
+; CHECK-LABEL: test_v4f32_element_3_z
[llvm-branch-commits] [llvm] DAG: Use modf vector libcalls through RuntimeLibcalls (PR #166986)
https://github.com/MacDue approved this pull request. https://github.com/llvm/llvm-project/pull/166986 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Fold copysign with a known signmask to a disjoint or (PR #167266)
https://github.com/RKSimon approved this pull request. https://github.com/llvm/llvm-project/pull/167266 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS 0-element globals to arrays with UINT32_MAX elements (PR #166952)
https://github.com/jmmartinez updated
https://github.com/llvm/llvm-project/pull/166952
From df43b6346491e7b78ff27956da4bc77d720124ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?=
Date: Fri, 7 Nov 2025 12:24:18 +0100
Subject: [PATCH 1/2] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS
0-element globals to arrays with UINT32_MAX elements
In HIP, dynamic LDS globals are represented using 0-element global
arrays in the __shared__ language addressspace.
extern __shared__ LDS[];
These are not representable in SPIRV directly.
To represent them, for AMD, we use an array with UINT32_MAX-elements.
These are reverse translated to 0-element arrays later in AMD's SPIRV runtime
pipeline.
---
llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp | 27 +++
llvm/test/CodeGen/SPIRV/hip_dyn_lds.ll| 20 ++
2 files changed, 47 insertions(+)
create mode 100644 llvm/test/CodeGen/SPIRV/hip_dyn_lds.ll
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
b/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
index c44c53129f1e0..42a9577bb2054 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
@@ -13,6 +13,7 @@
#include "SPIRV.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Module.h"
using namespace llvm;
@@ -43,6 +44,29 @@ bool tryExtendLLVMBitcodeMarker(GlobalVariable &Bitcode) {
return true;
}
+bool tryExtendDynamicLDSGlobal(GlobalVariable &GV) {
+ constexpr unsigned WorkgroupAS = 3;
+ const bool IsWorkgroupExternal =
+ GV.hasExternalLinkage() && GV.getAddressSpace() == WorkgroupAS;
+ if (!IsWorkgroupExternal)
+return false;
+
+ const ArrayType *AT = dyn_cast(GV.getValueType());
+ if (!AT || AT->getNumElements() != 0)
+return false;
+
+ constexpr auto Magic = std::numeric_limits::max();
+ ArrayType *NewAT = ArrayType::get(AT->getElementType(), Magic);
+ GlobalVariable *NewGV = new GlobalVariable(
+ *GV.getParent(), NewAT, GV.isConstant(), GV.getLinkage(), nullptr, "",
+ &GV, GV.getThreadLocalMode(), WorkgroupAS, GV.isExternallyInitialized());
+ NewGV->takeName(&GV);
+ GV.replaceAllUsesWith(NewGV);
+ GV.eraseFromParent();
+
+ return true;
+}
+
bool SPIRVPrepareGlobals::runOnModule(Module &M) {
const bool IsAMD = M.getTargetTriple().getVendor() == Triple::AMD;
if (!IsAMD)
@@ -52,6 +76,9 @@ bool SPIRVPrepareGlobals::runOnModule(Module &M) {
if (GlobalVariable *Bitcode = M.getNamedGlobal("llvm.embedded.module"))
Changed |= tryExtendLLVMBitcodeMarker(*Bitcode);
+ for (GlobalVariable &GV : make_early_inc_range(M.globals()))
+Changed |= tryExtendDynamicLDSGlobal(GV);
+
return Changed;
}
char SPIRVPrepareGlobals::ID = 0;
diff --git a/llvm/test/CodeGen/SPIRV/hip_dyn_lds.ll
b/llvm/test/CodeGen/SPIRV/hip_dyn_lds.ll
new file mode 100644
index 0..f0acfdfdede9d
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hip_dyn_lds.ll
@@ -0,0 +1,20 @@
+; RUN: llc -verify-machineinstrs -mtriple=spirv64-amd-amdhsa %s -o - |
FileCheck %s
+; RUN: %if spirv-tools %{ llc -mtriple=spirv64-amd-amdhsa %s -o -
-filetype=obj | spirv-val %}
+
+; CHECK: OpName %[[#LDS:]] "lds"
+; CHECK: OpDecorate %[[#LDS]] LinkageAttributes "lds" Import
+; CHECK: %[[#UINT:]] = OpTypeInt 32 0
+; CHECK: %[[#UINT_MAX:]] = OpConstant %[[#UINT]] 4294967295
+; CHECK: %[[#LDS_ARR_TY:]] = OpTypeArray %[[#UINT]] %[[#UINT_MAX]]
+; CHECK: %[[#LDS_ARR_PTR_WG:]] = OpTypePointer Workgroup %[[#LDS_ARR_TY]]
+; CHECK: %[[#LDS]] = OpVariable %[[#LDS_ARR_PTR_WG]] Workgroup
+
+@lds = external addrspace(3) global [0 x i32]
+
+define spir_kernel void @foo(ptr addrspace(4) %in, ptr addrspace(4) %out) {
+entry:
+ %val = load i32, ptr addrspace(4) %in
+ %add = add i32 %val, 1
+ store i32 %add, ptr addrspace(4) %out
+ ret void
+}
From 3d8cf0383226dccd06dd26ce9b56fb6535586ffc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?=
Date: Mon, 10 Nov 2025 11:08:22 +0100
Subject: [PATCH 2/2] [Review] Rename Magic->UInt32Max
---
llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
b/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
index 42a9577bb2054..2b4349e5d9e39 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
@@ -55,8 +55,8 @@ bool tryExtendDynamicLDSGlobal(GlobalVariable &GV) {
if (!AT || AT->getNumElements() != 0)
return false;
- constexpr auto Magic = std::numeric_limits::max();
- ArrayType *NewAT = ArrayType::get(AT->getElementType(), Magic);
+ constexpr auto UInt32Max = std::numeric_limits::max();
+ ArrayType *NewAT = ArrayType::get(AT->getElementType(), UInt32Max);
GlobalVariable *NewGV = new GlobalVariable(
*GV.getParent(), NewAT, GV.isConstant(), GV.getLinkage(), nullptr, "",
&GV, GV.getThreadLocalMode(), WorkgroupAS, GV.isExternallyInit
[llvm-branch-commits] [llvm] AMDGPU: Add baseline tests for copysign with known signmask input (PR #167265)
https://github.com/RKSimon approved this pull request. https://github.com/llvm/llvm-project/pull/167265 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Handle AssertNoFPClass in computeKnownBits (PR #167289)
https://github.com/RKSimon approved this pull request. https://github.com/llvm/llvm-project/pull/167289 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BTI] Add MCPlusBuilder::isBTILandingPad (PR #167306)
https://github.com/bgergely0 created
https://github.com/llvm/llvm-project/pull/167306
- takes both implicit and explicit BTIs into account
- fix related comment in AArch64BranchTargets.cpp
From 57061b65e6c36e8e6f18a1a0f0db2f0266730b0e Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Thu, 28 Aug 2025 12:32:37 +
Subject: [PATCH] [BOLT][BTI] Add MCPlusBuilder::isBTILandingPad
- takes both implicit and explicit BTIs into account
- fix related comment in AArch64BranchTargets.cpp
---
bolt/include/bolt/Core/MCPlusBuilder.h | 14 ++
.../Target/AArch64/AArch64MCPlusBuilder.cpp| 18 ++
bolt/unittests/Core/MCPlusBuilder.cpp | 17 +
.../Target/AArch64/AArch64BranchTargets.cpp| 6 --
4 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h
b/bolt/include/bolt/Core/MCPlusBuilder.h
index 9cbff02619bd2..6760586876d94 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1865,6 +1865,20 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}
+ /// Check if an Instruction is a BTI landing pad with the required
properties.
+ /// Takes both explicit and implicit BTIs into account.
+ virtual bool isBTILandingPad(MCInst &Inst, bool CouldCall,
+ bool CouldJump) const {
+llvm_unreachable("not implemented");
+return false;
+ }
+
+ /// Check if an Instruction is an implicit BTI c landing pad.
+ virtual bool isImplicitBTIC(MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return false;
+ }
+
/// Create a BTI landing pad instruction.
virtual void createBTI(MCInst &Inst, bool CouldCall, bool CouldJump) const {
llvm_unreachable("not implemented");
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index b6dfa16355ad9..3988847396182 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -2705,6 +2705,24 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return Insts;
}
+ bool isBTILandingPad(MCInst &Inst, bool CouldCall,
+ bool CouldJump) const override {
+unsigned HintNum = getBTIHintNum(CouldCall, CouldJump);
+bool IsExplicitBTI =
+Inst.getOpcode() == AArch64::HINT && Inst.getNumOperands() == 1 &&
+Inst.getOperand(0).isImm() && Inst.getOperand(0).getImm() == HintNum;
+
+bool IsImplicitBTI = HintNum == 34 && isImplicitBTIC(Inst);
+return IsExplicitBTI || IsImplicitBTI;
+ }
+
+ bool isImplicitBTIC(MCInst &Inst) const override {
+// PACI[AB]SP are always implicitly BTI C, independently of
+// SCTLR_EL1.BT[01].
+return Inst.getOpcode() == AArch64::PACIASP ||
+ Inst.getOpcode() == AArch64::PACIBSP;
+ }
+
void createBTI(MCInst &Inst, bool CouldCall, bool CouldJump) const override {
Inst.setOpcode(AArch64::HINT);
unsigned HintNum = getBTIHintNum(CouldCall, CouldJump);
diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp
b/bolt/unittests/Core/MCPlusBuilder.cpp
index 33389bca8b21e..439d72a343ce8 100644
--- a/bolt/unittests/Core/MCPlusBuilder.cpp
+++ b/bolt/unittests/Core/MCPlusBuilder.cpp
@@ -155,22 +155,39 @@ TEST_P(MCPlusBuilderTester, AArch64_BTI) {
auto II = BB->begin();
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 38);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, true));
MCInst BTIj;
BC->MIB->createBTI(BTIj, false, true);
II = BB->addInstruction(BTIj);
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 36);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, false, true));
MCInst BTIc;
BC->MIB->createBTI(BTIc, true, false);
II = BB->addInstruction(BTIc);
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 34);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
MCInst BTIinvalid;
ASSERT_DEATH(BC->MIB->createBTI(BTIinvalid, false, false),
"No target kinds!");
+
+ MCInst Paciasp = MCInstBuilder(AArch64::PACIASP);
+ II = BB->addInstruction(Paciasp);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, true, true));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, false, true));
+ ASSERT_TRUE(BC->MIB->isImplicitBTIC(*II));
+
+ MCInst Pacibsp = MCInstBuilder(AArch64::PACIBSP);
+ II = BB->addInstruction(Pacibsp);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, true, true));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, false, true));
+ ASSERT_TRUE(BC->MIB->isImplicitBTIC(*II));
}
TEST_P(MCPlusBuilderTester, AArch64_CmpJNE) {
diff --git a/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
b/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
index b6f3e56c3a18f..ea7de
[llvm-branch-commits] [llvm] [BOLT][BTI] Add MCPlusBuilder::isBTILandingPad (PR #167306)
bgergely0 wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.com/github/pr/llvm/llvm-project/167306?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#167306** https://app.graphite.com/github/pr/llvm/llvm-project/167306?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.com/github/pr/llvm/llvm-project/167306?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#167305** https://app.graphite.com/github/pr/llvm/llvm-project/167305?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/167306 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BTI] Add MCPlusBuilder::updateBTIVariant (PR #167308)
https://github.com/bgergely0 created
https://github.com/llvm/llvm-project/pull/167308
Checks if an instruction is BTI, and updates the immediate value to the
newly requested variant.
From ccf25bb359e3e8dc06ec14d140c415b29a7a35b7 Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Mon, 1 Sep 2025 08:52:28 +
Subject: [PATCH] [BOLT][BTI] Add MCPlusBuilder::updateBTIVariant
Checks if an instruction is BTI, and updates the immediate value to the
newly requested variant.
---
bolt/include/bolt/Core/MCPlusBuilder.h | 6 ++
bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 8
bolt/unittests/Core/MCPlusBuilder.cpp| 6 ++
3 files changed, 20 insertions(+)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h
b/bolt/include/bolt/Core/MCPlusBuilder.h
index 6760586876d94..660c1c64b06cf 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1884,6 +1884,12 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}
+ /// Update operand of BTI instruction.
+ virtual void updateBTIVariant(MCInst &Inst, bool CouldCall,
+bool CouldJump) const {
+llvm_unreachable("not implemented");
+ }
+
/// Store \p Target absolute address to \p RegName
virtual InstructionListType materializeAddress(const MCSymbol *Target,
MCContext *Ctx,
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 3988847396182..d27f2b05eb01a 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -2729,6 +2729,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
Inst.addOperand(MCOperand::createImm(HintNum));
}
+ void updateBTIVariant(MCInst &Inst, bool CouldCall,
+bool CouldJump) const override {
+assert(Inst.getOpcode() == AArch64::HINT && "Not a BTI instruction.");
+unsigned HintNum = getBTIHintNum(CouldCall, CouldJump);
+Inst.clear();
+Inst.addOperand(MCOperand::createImm(HintNum));
+ }
+
InstructionListType materializeAddress(const MCSymbol *Target, MCContext
*Ctx,
MCPhysReg RegName,
int64_t Addend = 0) const override {
diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp
b/bolt/unittests/Core/MCPlusBuilder.cpp
index 439d72a343ce8..02ecb87b4a5e3 100644
--- a/bolt/unittests/Core/MCPlusBuilder.cpp
+++ b/bolt/unittests/Core/MCPlusBuilder.cpp
@@ -156,6 +156,8 @@ TEST_P(MCPlusBuilderTester, AArch64_BTI) {
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 38);
ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, true));
+ BC->MIB->updateBTIVariant(*II, true, false);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
MCInst BTIj;
BC->MIB->createBTI(BTIj, false, true);
@@ -163,6 +165,8 @@ TEST_P(MCPlusBuilderTester, AArch64_BTI) {
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 36);
ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, false, true));
+ BC->MIB->updateBTIVariant(*II, true, true);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, true));
MCInst BTIc;
BC->MIB->createBTI(BTIc, true, false);
@@ -170,6 +174,8 @@ TEST_P(MCPlusBuilderTester, AArch64_BTI) {
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 34);
ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
+ BC->MIB->updateBTIVariant(*II, false, true);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, false, true));
MCInst BTIinvalid;
ASSERT_DEATH(BC->MIB->createBTI(BTIinvalid, false, false),
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BTI] Add MCPlusBuilder::updateBTIVariant (PR #167308)
bgergely0 wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.com/github/pr/llvm/llvm-project/167308?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#167308** https://app.graphite.com/github/pr/llvm/llvm-project/167308?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.com/github/pr/llvm/llvm-project/167308?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#167306** https://app.graphite.com/github/pr/llvm/llvm-project/167306?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#167305** https://app.graphite.com/github/pr/llvm/llvm-project/167305?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/167308 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopVectorize][NFC] Refactor widening decision logic (PR #140722)
https://github.com/skachkov-sc edited https://github.com/llvm/llvm-project/pull/140722 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopVectorize][NFC] Refactor widening decision logic (PR #140722)
skachkov-sc wrote: @david-arm addressed, but I've left memoryInstructionCanBeWidened as a separate function for now (it contains some early exits so I think the code will be harder to read after its substitution) https://github.com/llvm/llvm-project/pull/140722 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopVectorize][NFC] Refactor widening decision logic (PR #140722)
https://github.com/skachkov-sc updated
https://github.com/llvm/llvm-project/pull/140722
>From b08f1f89c1e8b8dd2acb0662fa1da021f27d9ab9 Mon Sep 17 00:00:00 2001
From: Sergey Kachkov
Date: Mon, 10 Nov 2025 16:35:23 +0300
Subject: [PATCH 1/2] [IVDescriptors] Add unit tests for MonotonicDescriptor
---
llvm/unittests/Analysis/IVDescriptorsTest.cpp | 131 ++
1 file changed, 131 insertions(+)
diff --git a/llvm/unittests/Analysis/IVDescriptorsTest.cpp
b/llvm/unittests/Analysis/IVDescriptorsTest.cpp
index 453800abf9cab..5e31a2bde7e7d 100644
--- a/llvm/unittests/Analysis/IVDescriptorsTest.cpp
+++ b/llvm/unittests/Analysis/IVDescriptorsTest.cpp
@@ -10,6 +10,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/IR/Dominators.h"
@@ -259,3 +260,133 @@ for.end:
EXPECT_EQ(Kind, RecurKind::FMax);
});
}
+
+TEST(IVDescriptorsTest, MonotonicIntVar) {
+ // Parse the module.
+ LLVMContext Context;
+
+ std::unique_ptr M =
+ parseIR(Context,
+ R"(define void @foo(i32 %start, i1 %cond, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
+ %monotonic = phi i32 [ %start, %entry ], [ %monotonic.next, %for.inc ]
+ br i1 %cond, label %if.then, label %for.inc
+
+if.then:
+ %inc = add nsw i32 %monotonic, 1
+ br label %for.inc
+
+for.inc:
+ %monotonic.next = phi i32 [ %inc, %if.then ], [ %monotonic, %for.body ]
+ %i.next = add nuw nsw i64 %i, 1
+ %exitcond.not = icmp eq i64 %i.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret void
+})");
+
+ runWithLoopInfoAndSE(
+ *M, "foo", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) {
+Function::iterator FI = F.begin();
+// First basic block is entry - skip it.
+BasicBlock *Header = &*(++FI);
+assert(Header->getName() == "for.body");
+Loop *L = LI.getLoopFor(Header);
+EXPECT_NE(L, nullptr);
+BasicBlock::iterator BBI = Header->begin();
+assert((&*BBI)->getName() == "i");
+PHINode *Phi = dyn_cast(&*(++BBI));
+assert(Phi->getName() == "monotonic");
+BasicBlock *IfThen = &*(++FI);
+assert(IfThen->getName() == "if.then");
+Instruction *StepInst = &*(IfThen->begin());
+assert(StepInst->getName() == "inc");
+BasicBlock *IfEnd = &*(++FI);
+assert(IfEnd->getName() == "for.inc");
+auto *ChainPhi = dyn_cast(&*(IfEnd->begin()));
+assert(ChainPhi->getName() == "monotonic.next");
+MonotonicDescriptor Desc;
+bool IsMonotonicPhi =
+MonotonicDescriptor::isMonotonicPHI(Phi, L, Desc, SE);
+EXPECT_TRUE(IsMonotonicPhi);
+auto &Chain = Desc.getChain();
+EXPECT_TRUE(Chain.size() == 1 && Chain.contains(ChainPhi));
+EXPECT_EQ(Desc.getStepInst(), StepInst);
+EXPECT_EQ(Desc.getPredicateEdge(),
+ MonotonicDescriptor::Edge(IfThen, IfEnd));
+auto *StartSCEV = SE.getSCEV(F.getArg(0));
+auto *StepSCEV = SE.getConstant(StartSCEV->getType(), 1);
+EXPECT_EQ(Desc.getExpr(),
+ SE.getAddRecExpr(StartSCEV, StepSCEV, L, SCEV::FlagNW));
+ });
+}
+
+TEST(IVDescriptorsTest, MonotonicPtrVar) {
+ // Parse the module.
+ LLVMContext Context;
+
+ std::unique_ptr M =
+ parseIR(Context,
+ R"(define void @foo(ptr %start, i1 %cond, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
+ %monotonic = phi ptr [ %start, %entry ], [ %monotonic.next, %for.inc ]
+ br i1 %cond, label %if.then, label %for.inc
+
+if.then:
+ %inc = getelementptr inbounds i8, ptr %monotonic, i64 4
+ br label %for.inc
+
+for.inc:
+ %monotonic.next = phi ptr [ %inc, %if.then ], [ %monotonic, %for.body ]
+ %i.next = add nuw nsw i64 %i, 1
+ %exitcond.not = icmp eq i64 %i.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret void
+})");
+
+ runWithLoopInfoAndSE(
+ *M, "foo", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) {
+Function::iterator FI = F.begin();
+// First basic block is entry - skip it.
+BasicBlock *Header = &*(++FI);
+assert(Header->getName() == "for.body");
+Loop *L = LI.getLoopFor(Header);
+EXPECT_NE(L, nullptr);
+BasicBlock::iterator BBI = Header->begin();
+assert((&*BBI)->getName() == "i");
+PHINode *Phi = dyn_cast(&*(++BBI));
+assert(Phi->getName() == "monotonic");
+BasicBlock *IfThen = &*(++FI);
+assert(IfThen->getName() == "if.then");
+Instruction *StepInst = &*(IfThen->begin());
+assert(StepInst->getName() == "inc");
+BasicBlock *IfEnd = &*(++FI);
+assert(If
[llvm-branch-commits] [llvm] DAG: Handle AssertNoFPClass in computeKnownBits (PR #167289)
llvmbot wrote:
@llvm/pr-subscribers-llvm-selectiondag
Author: Matt Arsenault (arsenm)
Changes
It's possible to determine the sign bit if the value is known
one of the positive/negative classes and not-nan.
---
Full diff: https://github.com/llvm/llvm-project/pull/167289.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+21)
- (modified) llvm/test/CodeGen/AMDGPU/compute-known-bits-nofpclass.ll (-2)
``diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 80bbfea7fb83c..27dcd8a546d91 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4121,6 +4121,27 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op,
const APInt &DemandedElts,
Known.One.clearLowBits(LogOfAlign);
break;
}
+ case ISD::AssertNoFPClass: {
+Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+FPClassTest NoFPClass =
+static_cast(Op.getConstantOperandVal(1));
+const FPClassTest NegativeTestMask = fcNan | fcNegative;
+if ((NoFPClass & NegativeTestMask) == NegativeTestMask) {
+ // Cannot be negative.
+ Known.Zero.setSignBit();
+ Known.One.clearSignBit();
+}
+
+const FPClassTest PositiveTestMask = fcNan | fcPositive;
+if ((NoFPClass & PositiveTestMask) == PositiveTestMask) {
+ // Cannot be positive.
+ Known.Zero.clearSignBit();
+ Known.One.setSignBit();
+}
+
+break;
+ }
case ISD::FGETSIGN:
// All bits are zero except the low bit.
Known.Zero.setBitsFrom(1);
diff --git a/llvm/test/CodeGen/AMDGPU/compute-known-bits-nofpclass.ll
b/llvm/test/CodeGen/AMDGPU/compute-known-bits-nofpclass.ll
index d440d58246333..244c3f7c2a96a 100644
--- a/llvm/test/CodeGen/AMDGPU/compute-known-bits-nofpclass.ll
+++ b/llvm/test/CodeGen/AMDGPU/compute-known-bits-nofpclass.ll
@@ -5,7 +5,6 @@ define i32 @known_positive(float nofpclass(nan ninf nzero nsub
nnorm) %signbit.z
; CHECK-LABEL: known_positive:
; CHECK: ; %bb.0:
; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:v_and_b32_e32 v0, 0x7fff, v0
; CHECK-NEXT:s_setpc_b64 s[30:31]
%cast = bitcast float %signbit.zero to i32
%and = and i32 %cast, 2147483647
@@ -27,7 +26,6 @@ define i32 @known_negative(float nofpclass(nan pinf pzero
psub pnorm) %signbit.o
; CHECK-LABEL: known_negative:
; CHECK: ; %bb.0:
; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:v_or_b32_e32 v0, 0x8000, v0
; CHECK-NEXT:s_setpc_b64 s[30:31]
%cast = bitcast float %signbit.one to i32
%or = or i32 %cast, -2147483648
``
https://github.com/llvm/llvm-project/pull/167289
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline tests for copysign with known signmask input (PR #167265)
arsenm wrote: Mismerged into wrong branch https://github.com/llvm/llvm-project/pull/167265 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Add AssertNoFPClass from call return attributes (PR #167264)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/167264
>From 0696fe61de175a41ac8a67c265a97e1611b9c5ab Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Sun, 9 Nov 2025 18:45:32 -0800
Subject: [PATCH 1/2] DAG: Add AssertNoFPClass from call return attributes
This defends against regressions in future patches. This excludes
the target intrinsic case for now; I'm worried introducing an intermediate
AssertNoFPClass is likely to break combines.
---
.../SelectionDAG/SelectionDAGBuilder.cpp| 17 +
.../CodeGen/SelectionDAG/SelectionDAGBuilder.h | 4
llvm/test/CodeGen/AMDGPU/nofpclass-call.ll | 16
3 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 88b0809b767b5..6a9022dff41ad 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4638,6 +4638,12 @@ static std::optional getRange(const
Instruction &I) {
return std::nullopt;
}
+static FPClassTest getNoFPClass(const Instruction &I) {
+ if (const auto *CB = dyn_cast(&I))
+return CB->getRetNoFPClass();
+ return fcNone;
+}
+
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (I.isAtomic())
return visitAtomicLoad(I);
@@ -9132,6 +9138,7 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB,
SDValue Callee,
if (Result.first.getNode()) {
Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first);
+Result.first = lowerNoFPClassToAssertNoFPClass(DAG, CB, Result.first);
setValue(&CB, Result.first);
}
@@ -10718,6 +10725,16 @@ SDValue
SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
return DAG.getMergeValues(Ops, SL);
}
+SDValue SelectionDAGBuilder::lowerNoFPClassToAssertNoFPClass(
+SelectionDAG &DAG, const Instruction &I, SDValue Op) {
+ FPClassTest Classes = getNoFPClass(I);
+ if (Classes == fcNone)
+return Op;
+
+ return DAG.getNode(ISD::AssertNoFPClass, SDLoc(Op), Op.getValueType(), Op,
+ DAG.getTargetConstant(Classes, SDLoc(), MVT::i32));
+}
+
/// Populate a CallLowerinInfo (into \p CLI) based on the properties of
/// the call being lowered.
///
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index ed63bee58c957..13e2daa783147 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -429,6 +429,10 @@ class SelectionDAGBuilder {
SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I,
SDValue Op);
+ // Lower nofpclass attributes to AssertNoFPClass
+ SDValue lowerNoFPClassToAssertNoFPClass(SelectionDAG &DAG,
+ const Instruction &I, SDValue Op);
+
void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI,
const CallBase *Call, unsigned ArgIdx,
unsigned NumArgs, SDValue Callee,
diff --git a/llvm/test/CodeGen/AMDGPU/nofpclass-call.ll
b/llvm/test/CodeGen/AMDGPU/nofpclass-call.ll
index 1861f02ec8b1c..5f303cc2a1eef 100644
--- a/llvm/test/CodeGen/AMDGPU/nofpclass-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/nofpclass-call.ll
@@ -35,9 +35,7 @@ define float @call_nofpclass_funcs_f32(ptr addrspace(1) %ptr)
{
; CHECK-NEXT:v_mov_b32_e32 v3, v0
; CHECK-NEXT:v_mov_b32_e32 v0, v2
; CHECK-NEXT:s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:v_max_f32_e32 v1, v3, v3
-; CHECK-NEXT:v_max_f32_e32 v0, v0, v0
-; CHECK-NEXT:v_min_f32_e32 v0, v1, v0
+; CHECK-NEXT:v_min_f32_e32 v0, v3, v0
; CHECK-NEXT:v_readlane_b32 s31, v4, 1
; CHECK-NEXT:v_readlane_b32 s30, v4, 0
; CHECK-NEXT:s_mov_b32 s32, s33
@@ -87,12 +85,8 @@ define <2 x float> @call_nofpclass_funcs_v2f32(ptr
addrspace(1) %ptr) {
; CHECK-NEXT:v_mov_b32_e32 v0, v3
; CHECK-NEXT:v_mov_b32_e32 v1, v2
; CHECK-NEXT:s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:v_max_f32_e32 v2, v4, v4
-; CHECK-NEXT:v_max_f32_e32 v0, v0, v0
-; CHECK-NEXT:v_min_f32_e32 v0, v2, v0
-; CHECK-NEXT:v_max_f32_e32 v2, v5, v5
-; CHECK-NEXT:v_max_f32_e32 v1, v1, v1
-; CHECK-NEXT:v_min_f32_e32 v1, v2, v1
+; CHECK-NEXT:v_min_f32_e32 v0, v4, v0
+; CHECK-NEXT:v_min_f32_e32 v1, v5, v1
; CHECK-NEXT:v_readlane_b32 s31, v6, 1
; CHECK-NEXT:v_readlane_b32 s30, v6, 0
; CHECK-NEXT:s_mov_b32 s32, s33
@@ -142,12 +136,10 @@ define double @call_nofpclass_funcs_f64(ptr addrspace(1)
%ptr) {
; CHECK-NEXT:v_mov_b32_e32 v0, v5
; CHECK-NEXT:v_mov_b32_e32 v1, v4
; CHECK-NEXT:s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:v_max_f64 v[2:3], v[2:3], v[2:3]
-; CHECK-NEXT:v_max_f64 v[0:1], v[0:1], v[0:1]
+; CHECK-NEXT:v_min_f64 v[0:1], v[2:3], v[0:1]
; CHECK-NE
[llvm-branch-commits] [llvm] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS 0-element globals to arrays with UINT32_MAX elements (PR #166952)
https://github.com/jmmartinez updated
https://github.com/llvm/llvm-project/pull/166952
From 8cbc3fd6403aef2cff7dbe585aa8d6762a011ef4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?=
Date: Fri, 7 Nov 2025 12:24:18 +0100
Subject: [PATCH 1/4] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS
0-element globals to arrays with UINT32_MAX elements
In HIP, dynamic LDS globals are represented using 0-element global
arrays in the __shared__ language addressspace.
extern __shared__ LDS[];
These are not representable in SPIRV directly.
To represent them, for AMD, we use an array with UINT32_MAX-elements.
These are reverse translated to 0-element arrays later in AMD's SPIRV runtime
pipeline.
---
llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp | 27 +++
llvm/test/CodeGen/SPIRV/hip_dyn_lds.ll| 20 ++
2 files changed, 47 insertions(+)
create mode 100644 llvm/test/CodeGen/SPIRV/hip_dyn_lds.ll
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
b/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
index c44c53129f1e0..42a9577bb2054 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
@@ -13,6 +13,7 @@
#include "SPIRV.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Module.h"
using namespace llvm;
@@ -43,6 +44,29 @@ bool tryExtendLLVMBitcodeMarker(GlobalVariable &Bitcode) {
return true;
}
+bool tryExtendDynamicLDSGlobal(GlobalVariable &GV) {
+ constexpr unsigned WorkgroupAS = 3;
+ const bool IsWorkgroupExternal =
+ GV.hasExternalLinkage() && GV.getAddressSpace() == WorkgroupAS;
+ if (!IsWorkgroupExternal)
+return false;
+
+ const ArrayType *AT = dyn_cast(GV.getValueType());
+ if (!AT || AT->getNumElements() != 0)
+return false;
+
+ constexpr auto Magic = std::numeric_limits::max();
+ ArrayType *NewAT = ArrayType::get(AT->getElementType(), Magic);
+ GlobalVariable *NewGV = new GlobalVariable(
+ *GV.getParent(), NewAT, GV.isConstant(), GV.getLinkage(), nullptr, "",
+ &GV, GV.getThreadLocalMode(), WorkgroupAS, GV.isExternallyInitialized());
+ NewGV->takeName(&GV);
+ GV.replaceAllUsesWith(NewGV);
+ GV.eraseFromParent();
+
+ return true;
+}
+
bool SPIRVPrepareGlobals::runOnModule(Module &M) {
const bool IsAMD = M.getTargetTriple().getVendor() == Triple::AMD;
if (!IsAMD)
@@ -52,6 +76,9 @@ bool SPIRVPrepareGlobals::runOnModule(Module &M) {
if (GlobalVariable *Bitcode = M.getNamedGlobal("llvm.embedded.module"))
Changed |= tryExtendLLVMBitcodeMarker(*Bitcode);
+ for (GlobalVariable &GV : make_early_inc_range(M.globals()))
+Changed |= tryExtendDynamicLDSGlobal(GV);
+
return Changed;
}
char SPIRVPrepareGlobals::ID = 0;
diff --git a/llvm/test/CodeGen/SPIRV/hip_dyn_lds.ll
b/llvm/test/CodeGen/SPIRV/hip_dyn_lds.ll
new file mode 100644
index 0..f0acfdfdede9d
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hip_dyn_lds.ll
@@ -0,0 +1,20 @@
+; RUN: llc -verify-machineinstrs -mtriple=spirv64-amd-amdhsa %s -o - |
FileCheck %s
+; RUN: %if spirv-tools %{ llc -mtriple=spirv64-amd-amdhsa %s -o -
-filetype=obj | spirv-val %}
+
+; CHECK: OpName %[[#LDS:]] "lds"
+; CHECK: OpDecorate %[[#LDS]] LinkageAttributes "lds" Import
+; CHECK: %[[#UINT:]] = OpTypeInt 32 0
+; CHECK: %[[#UINT_MAX:]] = OpConstant %[[#UINT]] 4294967295
+; CHECK: %[[#LDS_ARR_TY:]] = OpTypeArray %[[#UINT]] %[[#UINT_MAX]]
+; CHECK: %[[#LDS_ARR_PTR_WG:]] = OpTypePointer Workgroup %[[#LDS_ARR_TY]]
+; CHECK: %[[#LDS]] = OpVariable %[[#LDS_ARR_PTR_WG]] Workgroup
+
+@lds = external addrspace(3) global [0 x i32]
+
+define spir_kernel void @foo(ptr addrspace(4) %in, ptr addrspace(4) %out) {
+entry:
+ %val = load i32, ptr addrspace(4) %in
+ %add = add i32 %val, 1
+ store i32 %add, ptr addrspace(4) %out
+ ret void
+}
From 0376c3e6457061bea6ec16cb9df6789b93cac69f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?=
Date: Mon, 10 Nov 2025 11:08:22 +0100
Subject: [PATCH 2/4] [Review] Rename Magic->UInt32Max
---
llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
b/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
index 42a9577bb2054..2b4349e5d9e39 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareGlobals.cpp
@@ -55,8 +55,8 @@ bool tryExtendDynamicLDSGlobal(GlobalVariable &GV) {
if (!AT || AT->getNumElements() != 0)
return false;
- constexpr auto Magic = std::numeric_limits::max();
- ArrayType *NewAT = ArrayType::get(AT->getElementType(), Magic);
+ constexpr auto UInt32Max = std::numeric_limits::max();
+ ArrayType *NewAT = ArrayType::get(AT->getElementType(), UInt32Max);
GlobalVariable *NewGV = new GlobalVariable(
*GV.getParent(), NewAT, GV.isConstant(), GV.getLinkage(), nullptr, "",
&GV, GV.getThreadLocalMode(), WorkgroupAS, GV.isExternallyInit
[llvm-branch-commits] [llvm] AMDGPU: Add baseline tests for copysign with known signmask input (PR #167265)
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/167265 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Fold copysign with a known signmask to a disjoint or (PR #167266)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/167266 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS 0-element globals to arrays with UINT32_MAX elements (PR #166952)
@@ -52,6 +76,9 @@ bool SPIRVPrepareGlobals::runOnModule(Module &M) {
if (GlobalVariable *Bitcode = M.getNamedGlobal("llvm.embedded.module"))
Changed |= tryExtendLLVMBitcodeMarker(*Bitcode);
+ for (GlobalVariable &GV : make_early_inc_range(M.globals()))
+Changed |= tryExtendDynamicLDSGlobal(GV);
jmmartinez wrote:
I've added a comment on top of the function:
```cpp
// In HIP, dynamic LDS variables are represented using 0-element global arrays
// in the __shared__ language address-space.
//
// extern __shared__ int LDS[];
//
// These are not representable in SPIRV directly.
// To represent them, for AMD, we use an array with UINT32_MAX-elements.
// These are reverse translated to 0-element arrays.
bool tryExtendDynamicLDSGlobal(GlobalVariable &GV) {
```
https://github.com/llvm/llvm-project/pull/166952
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Fold copysign with a known signmask to a disjoint or (PR #167266)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/167266
>From d6b054dc67ba829009cb588faa5c868ea13121b9 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Thu, 1 Aug 2024 15:11:59 +0400
Subject: [PATCH 1/3] DAG: Fold copysign with a known signmask to a disjoint or
If the sign bit is a computed sign mask (i.e., we know it's
either +0 or -0), turn this into a disjoint or. This pattern
appears in the pow implementations.
We also need to know the sign bit of the magnitude is 0 for
the or to be disjoint. Unfortunately the DAG's FP tracking is
weak and we did not have a way to check if the sign bit is known
0, so add something for that. Ideally we would get a complete
computeKnownFPClass implementation.
This is intended to help avoid the regression which caused
d3e7c4ce7a3d7 to be reverted.
---
llvm/include/llvm/CodeGen/SelectionDAG.h | 4 +++
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 20 +
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 28 +++
.../AMDGPU/copysign-simplify-demanded-bits.ll | 23 ++-
.../AMDGPU/copysign-to-disjoint-or-combine.ll | 22 ---
5 files changed, 67 insertions(+), 30 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 0dd4f23c6d85f..5b331e915 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -2072,6 +2072,10 @@ class SelectionDAG {
/// We use this predicate to simplify operations downstream.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth = 0) const;
+ /// Return true if the sign bit of Op is known to be zero, for a
+ /// floating-point value.
+ LLVM_ABI bool SignBitIsZeroFP(SDValue Op, unsigned Depth = 0) const;
+
/// Return true if 'Op & Mask' is known to be zero. We
/// use this predicate to simplify operations downstream. Op and Mask are
/// known to be the same type.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f144f17d5a8f2..4f2eb1e64dbe0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18863,6 +18863,26 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ if (VT != N1.getValueType())
+return SDValue();
+
+ // If this is equivalent to a disjoint or, replace it with one. This can
+ // happen if the sign operand is a sign mask (i.e., x << sign_bit_position).
+ if (DAG.SignBitIsZeroFP(N0) &&
+ DAG.computeKnownBits(N1).Zero.isMaxSignedValue()) {
+// TODO: Just directly match the shift pattern. computeKnownBits is heavy
+// for a such a narrowly targeted case.
+EVT IntVT = VT.changeTypeToInteger();
+// TODO: It appears to be profitable in some situations to unconditionally
+// emit a fabs(n0) to perform this combine.
+SDValue CastSrc0 = DAG.getNode(ISD::BITCAST, DL, IntVT, N0);
+SDValue CastSrc1 = DAG.getNode(ISD::BITCAST, DL, IntVT, N1);
+
+SDValue SignOr = DAG.getNode(ISD::OR, DL, IntVT, CastSrc0, CastSrc1,
+ SDNodeFlags::Disjoint);
+return DAG.getNode(ISD::BITCAST, DL, VT, SignOr);
+ }
+
return SDValue();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 80bbfea7fb83c..d7b3b03deb757 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2920,6 +2920,34 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned
Depth) const {
return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth);
}
+bool SelectionDAG::SignBitIsZeroFP(SDValue Op, unsigned Depth) const {
+ if (Depth >= MaxRecursionDepth)
+return false; // Limit search depth.
+
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ case ISD::FABS:
+return true;
+ case ISD::AssertNoFPClass: {
+FPClassTest NoFPClass =
+static_cast(Op.getConstantOperandVal(1));
+
+const FPClassTest TestMask = fcNan | fcNegative;
+return (NoFPClass & TestMask) == TestMask;
+ }
+ case ISD::ARITH_FENCE:
+return SignBitIsZeroFP(Op, Depth + 1);
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FEXP10:
+return Op->getFlags().hasNoNaNs();
+ default:
+return false;
+ }
+
+ llvm_unreachable("covered opcode switch");
+}
+
/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
/// this predicate to simplify operations downstream. Mask is known to be zero
/// for bits that V cannot have.
diff --git a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
index 0be2b5c70c93b..ef676ddc8070e 100644
--- a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
+++ b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
@@ -345,15 +345,13
[llvm-branch-commits] [llvm] DAG: Fold copysign with a known signmask to a disjoint or (PR #167266)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/167266
>From d6b054dc67ba829009cb588faa5c868ea13121b9 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Thu, 1 Aug 2024 15:11:59 +0400
Subject: [PATCH 1/3] DAG: Fold copysign with a known signmask to a disjoint or
If the sign bit is a computed sign mask (i.e., we know it's
either +0 or -0), turn this into a disjoint or. This pattern
appears in the pow implementations.
We also need to know the sign bit of the magnitude is 0 for
the or to be disjoint. Unfortunately the DAG's FP tracking is
weak and we did not have a way to check if the sign bit is known
0, so add something for that. Ideally we would get a complete
computeKnownFPClass implementation.
This is intended to help avoid the regression which caused
d3e7c4ce7a3d7 to be reverted.
---
llvm/include/llvm/CodeGen/SelectionDAG.h | 4 +++
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 20 +
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 28 +++
.../AMDGPU/copysign-simplify-demanded-bits.ll | 23 ++-
.../AMDGPU/copysign-to-disjoint-or-combine.ll | 22 ---
5 files changed, 67 insertions(+), 30 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 0dd4f23c6d85f..5b331e915 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -2072,6 +2072,10 @@ class SelectionDAG {
/// We use this predicate to simplify operations downstream.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth = 0) const;
+ /// Return true if the sign bit of Op is known to be zero, for a
+ /// floating-point value.
+ LLVM_ABI bool SignBitIsZeroFP(SDValue Op, unsigned Depth = 0) const;
+
/// Return true if 'Op & Mask' is known to be zero. We
/// use this predicate to simplify operations downstream. Op and Mask are
/// known to be the same type.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f144f17d5a8f2..4f2eb1e64dbe0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18863,6 +18863,26 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ if (VT != N1.getValueType())
+return SDValue();
+
+ // If this is equivalent to a disjoint or, replace it with one. This can
+ // happen if the sign operand is a sign mask (i.e., x << sign_bit_position).
+ if (DAG.SignBitIsZeroFP(N0) &&
+ DAG.computeKnownBits(N1).Zero.isMaxSignedValue()) {
+// TODO: Just directly match the shift pattern. computeKnownBits is heavy
+// for a such a narrowly targeted case.
+EVT IntVT = VT.changeTypeToInteger();
+// TODO: It appears to be profitable in some situations to unconditionally
+// emit a fabs(n0) to perform this combine.
+SDValue CastSrc0 = DAG.getNode(ISD::BITCAST, DL, IntVT, N0);
+SDValue CastSrc1 = DAG.getNode(ISD::BITCAST, DL, IntVT, N1);
+
+SDValue SignOr = DAG.getNode(ISD::OR, DL, IntVT, CastSrc0, CastSrc1,
+ SDNodeFlags::Disjoint);
+return DAG.getNode(ISD::BITCAST, DL, VT, SignOr);
+ }
+
return SDValue();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 80bbfea7fb83c..d7b3b03deb757 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2920,6 +2920,34 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned
Depth) const {
return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth);
}
+bool SelectionDAG::SignBitIsZeroFP(SDValue Op, unsigned Depth) const {
+ if (Depth >= MaxRecursionDepth)
+return false; // Limit search depth.
+
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ case ISD::FABS:
+return true;
+ case ISD::AssertNoFPClass: {
+FPClassTest NoFPClass =
+static_cast(Op.getConstantOperandVal(1));
+
+const FPClassTest TestMask = fcNan | fcNegative;
+return (NoFPClass & TestMask) == TestMask;
+ }
+ case ISD::ARITH_FENCE:
+return SignBitIsZeroFP(Op, Depth + 1);
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FEXP10:
+return Op->getFlags().hasNoNaNs();
+ default:
+return false;
+ }
+
+ llvm_unreachable("covered opcode switch");
+}
+
/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
/// this predicate to simplify operations downstream. Mask is known to be zero
/// for bits that V cannot have.
diff --git a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
index 0be2b5c70c93b..ef676ddc8070e 100644
--- a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
+++ b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
@@ -345,15 +345,13
[llvm-branch-commits] [llvm] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS 0-element globals to arrays with UINT32_MAX elements (PR #166952)
@@ -52,6 +76,9 @@ bool SPIRVPrepareGlobals::runOnModule(Module &M) {
if (GlobalVariable *Bitcode = M.getNamedGlobal("llvm.embedded.module"))
Changed |= tryExtendLLVMBitcodeMarker(*Bitcode);
+ for (GlobalVariable &GV : make_early_inc_range(M.globals()))
+Changed |= tryExtendDynamicLDSGlobal(GV);
jmmartinez wrote:
You're right, I'll add a comment.
https://github.com/llvm/llvm-project/pull/166952
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS 0-element globals to arrays with UINT32_MAX elements (PR #166952)
@@ -43,6 +44,29 @@ bool tryExtendLLVMBitcodeMarker(GlobalVariable &Bitcode) {
return true;
}
+bool tryExtendDynamicLDSGlobal(GlobalVariable &GV) {
+ constexpr unsigned WorkgroupAS = 3;
+ const bool IsWorkgroupExternal =
+ GV.hasExternalLinkage() && GV.getAddressSpace() == WorkgroupAS;
+ if (!IsWorkgroupExternal)
+return false;
+
+ const ArrayType *AT = dyn_cast(GV.getValueType());
+ if (!AT || AT->getNumElements() != 0)
+return false;
jmmartinez wrote:
Sorry I'm not sure I understood the question.
This condition is matching globals that have an array type with 0 elements. If
the global does not have an array type or if it is an array type with a size
different from 0 this function returns false.
Maybe there is a mix from using `getValueType` vs `getType`. The first gives
the type of the initializer of the global (the value stored in the global), the
second returns the type of the global when used as a value in the llvm-ir (a
pointer type).
https://github.com/llvm/llvm-project/pull/166952
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS 0-element globals to arrays with UINT32_MAX elements (PR #166952)
jmmartinez wrote: > I'm wondering if you need to change all 0-sized array or not. If so, we might > want to centralize the conversion of 0-sized arrays. We could try to move the > code that changes them to 1 element arrays here as well. If you point me to where this is happening I can try to do that too as a follow-up patch. https://github.com/llvm/llvm-project/pull/166952 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS 0-element globals to arrays with UINT32_MAX elements (PR #166952)
@@ -43,6 +44,29 @@ bool tryExtendLLVMBitcodeMarker(GlobalVariable &Bitcode) {
return true;
}
+bool tryExtendDynamicLDSGlobal(GlobalVariable &GV) {
+ constexpr unsigned WorkgroupAS = 3;
jmmartinez wrote:
Way better. Thanks !
https://github.com/llvm/llvm-project/pull/166952
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BTI] Add MCPlusBuilder::updateBTIVariant (PR #167308)
https://github.com/bgergely0 updated
https://github.com/llvm/llvm-project/pull/167308
From 8b3ff18ecba598ab6560c05924d1ce78d9a9 Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Mon, 1 Sep 2025 08:52:28 +
Subject: [PATCH] [BOLT][BTI] Add MCPlusBuilder::updateBTIVariant
Checks if an instruction is BTI, and updates the immediate value to the
newly requested variant.
---
bolt/include/bolt/Core/MCPlusBuilder.h | 6 ++
bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 8
bolt/unittests/Core/MCPlusBuilder.cpp| 6 ++
3 files changed, 20 insertions(+)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h
b/bolt/include/bolt/Core/MCPlusBuilder.h
index 6760586876d94..660c1c64b06cf 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1884,6 +1884,12 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}
+ /// Update operand of BTI instruction.
+ virtual void updateBTIVariant(MCInst &Inst, bool CouldCall,
+bool CouldJump) const {
+llvm_unreachable("not implemented");
+ }
+
/// Store \p Target absolute address to \p RegName
virtual InstructionListType materializeAddress(const MCSymbol *Target,
MCContext *Ctx,
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 0cf3db8a6fc2a..cb0a9cc0c12db 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -2730,6 +2730,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
Inst.addOperand(MCOperand::createImm(HintNum));
}
+ void updateBTIVariant(MCInst &Inst, bool CouldCall,
+bool CouldJump) const override {
+assert(Inst.getOpcode() == AArch64::HINT && "Not a BTI instruction.");
+unsigned HintNum = getBTIHintNum(CouldCall, CouldJump);
+Inst.clear();
+Inst.addOperand(MCOperand::createImm(HintNum));
+ }
+
InstructionListType materializeAddress(const MCSymbol *Target, MCContext
*Ctx,
MCPhysReg RegName,
int64_t Addend = 0) const override {
diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp
b/bolt/unittests/Core/MCPlusBuilder.cpp
index 439d72a343ce8..02ecb87b4a5e3 100644
--- a/bolt/unittests/Core/MCPlusBuilder.cpp
+++ b/bolt/unittests/Core/MCPlusBuilder.cpp
@@ -156,6 +156,8 @@ TEST_P(MCPlusBuilderTester, AArch64_BTI) {
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 38);
ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, true));
+ BC->MIB->updateBTIVariant(*II, true, false);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
MCInst BTIj;
BC->MIB->createBTI(BTIj, false, true);
@@ -163,6 +165,8 @@ TEST_P(MCPlusBuilderTester, AArch64_BTI) {
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 36);
ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, false, true));
+ BC->MIB->updateBTIVariant(*II, true, true);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, true));
MCInst BTIc;
BC->MIB->createBTI(BTIc, true, false);
@@ -170,6 +174,8 @@ TEST_P(MCPlusBuilderTester, AArch64_BTI) {
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 34);
ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
+ BC->MIB->updateBTIVariant(*II, false, true);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, false, true));
MCInst BTIinvalid;
ASSERT_DEATH(BC->MIB->createBTI(BTIinvalid, false, false),
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BTI] Add MCPlusBuilder::updateBTIVariant (PR #167308)
https://github.com/bgergely0 updated
https://github.com/llvm/llvm-project/pull/167308
From 8b3ff18ecba598ab6560c05924d1ce78d9a9 Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Mon, 1 Sep 2025 08:52:28 +
Subject: [PATCH] [BOLT][BTI] Add MCPlusBuilder::updateBTIVariant
Checks if an instruction is BTI, and updates the immediate value to the
newly requested variant.
---
bolt/include/bolt/Core/MCPlusBuilder.h | 6 ++
bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 8
bolt/unittests/Core/MCPlusBuilder.cpp| 6 ++
3 files changed, 20 insertions(+)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h
b/bolt/include/bolt/Core/MCPlusBuilder.h
index 6760586876d94..660c1c64b06cf 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1884,6 +1884,12 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}
+ /// Update operand of BTI instruction.
+ virtual void updateBTIVariant(MCInst &Inst, bool CouldCall,
+bool CouldJump) const {
+llvm_unreachable("not implemented");
+ }
+
/// Store \p Target absolute address to \p RegName
virtual InstructionListType materializeAddress(const MCSymbol *Target,
MCContext *Ctx,
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 0cf3db8a6fc2a..cb0a9cc0c12db 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -2730,6 +2730,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
Inst.addOperand(MCOperand::createImm(HintNum));
}
+ void updateBTIVariant(MCInst &Inst, bool CouldCall,
+bool CouldJump) const override {
+assert(Inst.getOpcode() == AArch64::HINT && "Not a BTI instruction.");
+unsigned HintNum = getBTIHintNum(CouldCall, CouldJump);
+Inst.clear();
+Inst.addOperand(MCOperand::createImm(HintNum));
+ }
+
InstructionListType materializeAddress(const MCSymbol *Target, MCContext
*Ctx,
MCPhysReg RegName,
int64_t Addend = 0) const override {
diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp
b/bolt/unittests/Core/MCPlusBuilder.cpp
index 439d72a343ce8..02ecb87b4a5e3 100644
--- a/bolt/unittests/Core/MCPlusBuilder.cpp
+++ b/bolt/unittests/Core/MCPlusBuilder.cpp
@@ -156,6 +156,8 @@ TEST_P(MCPlusBuilderTester, AArch64_BTI) {
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 38);
ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, true));
+ BC->MIB->updateBTIVariant(*II, true, false);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
MCInst BTIj;
BC->MIB->createBTI(BTIj, false, true);
@@ -163,6 +165,8 @@ TEST_P(MCPlusBuilderTester, AArch64_BTI) {
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 36);
ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, false, true));
+ BC->MIB->updateBTIVariant(*II, true, true);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, true));
MCInst BTIc;
BC->MIB->createBTI(BTIc, true, false);
@@ -170,6 +174,8 @@ TEST_P(MCPlusBuilderTester, AArch64_BTI) {
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 34);
ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
+ BC->MIB->updateBTIVariant(*II, false, true);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, false, true));
MCInst BTIinvalid;
ASSERT_DEATH(BC->MIB->createBTI(BTIinvalid, false, false),
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BTI] Add MCPlusBuilder::isBTILandingPad (PR #167306)
https://github.com/bgergely0 updated
https://github.com/llvm/llvm-project/pull/167306
From f1cc15ef3330cfd6f4151d461e7e7d963df4e56d Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Thu, 28 Aug 2025 12:32:37 +
Subject: [PATCH] [BOLT][BTI] Add MCPlusBuilder::isBTILandingPad
- takes both implicit and explicit BTIs into account
- fix related comment in AArch64BranchTargets.cpp
---
bolt/include/bolt/Core/MCPlusBuilder.h | 14 ++
.../Target/AArch64/AArch64MCPlusBuilder.cpp| 18 ++
bolt/unittests/Core/MCPlusBuilder.cpp | 17 +
.../Target/AArch64/AArch64BranchTargets.cpp| 6 --
4 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h
b/bolt/include/bolt/Core/MCPlusBuilder.h
index 9cbff02619bd2..6760586876d94 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1865,6 +1865,20 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}
+ /// Check if an Instruction is a BTI landing pad with the required
properties.
+ /// Takes both explicit and implicit BTIs into account.
+ virtual bool isBTILandingPad(MCInst &Inst, bool CouldCall,
+ bool CouldJump) const {
+llvm_unreachable("not implemented");
+return false;
+ }
+
+ /// Check if an Instruction is an implicit BTI c landing pad.
+ virtual bool isImplicitBTIC(MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return false;
+ }
+
/// Create a BTI landing pad instruction.
virtual void createBTI(MCInst &Inst, bool CouldCall, bool CouldJump) const {
llvm_unreachable("not implemented");
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 8cfde5701ee7f..0cf3db8a6fc2a 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -2706,6 +2706,24 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return Insts;
}
+ bool isBTILandingPad(MCInst &Inst, bool CouldCall,
+ bool CouldJump) const override {
+unsigned HintNum = getBTIHintNum(CouldCall, CouldJump);
+bool IsExplicitBTI =
+Inst.getOpcode() == AArch64::HINT && Inst.getNumOperands() == 1 &&
+Inst.getOperand(0).isImm() && Inst.getOperand(0).getImm() == HintNum;
+
+bool IsImplicitBTI = HintNum == 34 && isImplicitBTIC(Inst);
+return IsExplicitBTI || IsImplicitBTI;
+ }
+
+ bool isImplicitBTIC(MCInst &Inst) const override {
+// PACI[AB]SP are always implicitly BTI C, independently of
+// SCTLR_EL1.BT[01].
+return Inst.getOpcode() == AArch64::PACIASP ||
+ Inst.getOpcode() == AArch64::PACIBSP;
+ }
+
void createBTI(MCInst &Inst, bool CouldCall, bool CouldJump) const override {
Inst.setOpcode(AArch64::HINT);
unsigned HintNum = getBTIHintNum(CouldCall, CouldJump);
diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp
b/bolt/unittests/Core/MCPlusBuilder.cpp
index 33389bca8b21e..439d72a343ce8 100644
--- a/bolt/unittests/Core/MCPlusBuilder.cpp
+++ b/bolt/unittests/Core/MCPlusBuilder.cpp
@@ -155,22 +155,39 @@ TEST_P(MCPlusBuilderTester, AArch64_BTI) {
auto II = BB->begin();
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 38);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, true));
MCInst BTIj;
BC->MIB->createBTI(BTIj, false, true);
II = BB->addInstruction(BTIj);
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 36);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, false, true));
MCInst BTIc;
BC->MIB->createBTI(BTIc, true, false);
II = BB->addInstruction(BTIc);
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 34);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
MCInst BTIinvalid;
ASSERT_DEATH(BC->MIB->createBTI(BTIinvalid, false, false),
"No target kinds!");
+
+ MCInst Paciasp = MCInstBuilder(AArch64::PACIASP);
+ II = BB->addInstruction(Paciasp);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, true, true));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, false, true));
+ ASSERT_TRUE(BC->MIB->isImplicitBTIC(*II));
+
+ MCInst Pacibsp = MCInstBuilder(AArch64::PACIBSP);
+ II = BB->addInstruction(Pacibsp);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, true, true));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, false, true));
+ ASSERT_TRUE(BC->MIB->isImplicitBTIC(*II));
}
TEST_P(MCPlusBuilderTester, AArch64_CmpJNE) {
diff --git a/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
b/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
index b6f3e56c3a18f..ea7de840faf92 100644
--- a/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
+++ b/llvm/lib/Target/AArch64/AAr
[llvm-branch-commits] [llvm] [BOLT][BTI] Add MCPlusBuilder::addBTItoBBStart (PR #167329)
https://github.com/bgergely0 created
https://github.com/llvm/llvm-project/pull/167329
This function contains most of the logic for BTI:
- it takes the BasicBlock and the instruction used to jump to it.
- then it checks if the first non-pseudo instruction is a sufficient
landing pad for the used call.
- if not, it generates the correct BTI instruction.
Also introduce the isBTIVariantCoveringCall helper to simplify the logic.
From 4fd752107da2e66670dc19c03abd798b35a903b1 Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Wed, 17 Sep 2025 12:24:04 +
Subject: [PATCH] [BOLT][BTI] Add MCPlusBuilder::addBTItoBBStart
This function contains most of the logic for BTI:
- it takes the BasicBlock and the instruction used to jump to it.
- then it checks if the first non-pseudo instruction is a sufficient
landing pad for the used call.
- if not, it generates the correct BTI instruction.
Also introduce the isBTIVariantCoveringCall helper to simplify the logic.
---
bolt/include/bolt/Core/MCPlusBuilder.h| 13 +++
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 75 +
bolt/unittests/Core/MCPlusBuilder.cpp | 105 ++
3 files changed, 193 insertions(+)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h
b/bolt/include/bolt/Core/MCPlusBuilder.h
index 660c1c64b06cf..4eaf444c320bf 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1890,6 +1890,19 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}
+ /// Checks if the indirect call / jump is accepted by the landing pad at the
+ /// start of the target BasicBlock.
+ virtual bool isBTIVariantCoveringCall(MCInst &Call, MCInst &Pad) const {
+llvm_unreachable("not implemented");
+return false;
+ }
+
+ /// Adds a BTI landing pad to the start of the BB, that matches the indirect
+ /// call/jump inst.
+ virtual void addBTItoBBStart(BinaryBasicBlock &BB, MCInst &Call) const {
+llvm_unreachable("not implemented");
+ }
+
/// Store \p Target absolute address to \p RegName
virtual InstructionListType materializeAddress(const MCSymbol *Target,
MCContext *Ctx,
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index cb0a9cc0c12db..a5c88e113f726 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -2738,6 +2738,81 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
Inst.addOperand(MCOperand::createImm(HintNum));
}
+ bool isBTIVariantCoveringCall(MCInst &Call, MCInst &Pad) const override {
+assert((isIndirectCall(Call) || isIndirectBranch(Call)) &&
+ "Not an indirect call or branch.");
+
+// A BLR can be accepted by a BTI c.
+if (isIndirectCall(Call))
+ return isBTILandingPad(Pad, true, false) ||
+ isBTILandingPad(Pad, true, true);
+
+// A BR can be accepted by a BTI j or BTI c (and BTI jc) IF the operand is
+// x16 or x17. If the operand is not x16 or x17, it can be accepted by a
BTI
+// j or BTI jc (and not BTI c).
+if (isIndirectBranch(Call)) {
+ assert(Call.getNumOperands() == 1 &&
+ "Indirect branch needs to have 1 operand.");
+ assert(Call.getOperand(0).isReg() &&
+ "Indirect branch does not have a register operand.");
+ MCPhysReg Reg = Call.getOperand(0).getReg();
+ if (Reg == AArch64::X16 || Reg == AArch64::X17)
+return isBTILandingPad(Pad, true, false) ||
+ isBTILandingPad(Pad, false, true) ||
+ isBTILandingPad(Pad, true, true);
+ return isBTILandingPad(Pad, false, true) ||
+ isBTILandingPad(Pad, true, true);
+}
+return false;
+ }
+
+ void addBTItoBBStart(BinaryBasicBlock &BB, MCInst &Call) const override {
+auto II = BB.getFirstNonPseudo();
+if (II != BB.end()) {
+ if (isBTIVariantCoveringCall(Call, *II))
+return;
+ // A BLR can be accepted by a BTI c.
+ if (isIndirectCall(Call)) {
+// if we have a BTI j at the start, extend it to a BTI jc,
+// otherwise insert a new BTI c.
+if (isBTILandingPad(*II, false, true)) {
+ updateBTIVariant(*II, true, true);
+} else {
+ MCInst BTIInst;
+ createBTI(BTIInst, true, false);
+ BB.insertInstruction(II, BTIInst);
+}
+ }
+
+ // A BR can be accepted by a BTI j or BTI c (and BTI jc) IF the operand
is
+ // x16 or x17. If the operand is not x16 or x17, it can be accepted by a
+ // BTI j or BTI jc (and not BTI c).
+ if (isIndirectBranch(Call)) {
+assert(Call.getNumOperands() == 1 &&
+ "Indirect branch needs to have 1 operand.");
+assert(Call.getOperand(0).isReg() &&
+ "Indirect branch does not have a register operand.");
+MCPhysReg Reg = Call.getOperand(0).getReg();
+
[llvm-branch-commits] [llvm] [BOLT][BTI] Add MCPlusBuilder::addBTItoBBStart (PR #167329)
bgergely0 wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.com/github/pr/llvm/llvm-project/167329?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#167329** https://app.graphite.com/github/pr/llvm/llvm-project/167329?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.com/github/pr/llvm/llvm-project/167329?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#167308** https://app.graphite.com/github/pr/llvm/llvm-project/167308?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#167306** https://app.graphite.com/github/pr/llvm/llvm-project/167306?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#167305** https://app.graphite.com/github/pr/llvm/llvm-project/167305?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/167329 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BTI] Add MCPlusBuilder::isBTILandingPad (PR #167306)
https://github.com/bgergely0 updated
https://github.com/llvm/llvm-project/pull/167306
From f1cc15ef3330cfd6f4151d461e7e7d963df4e56d Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Thu, 28 Aug 2025 12:32:37 +
Subject: [PATCH] [BOLT][BTI] Add MCPlusBuilder::isBTILandingPad
- takes both implicit and explicit BTIs into account
- fix related comment in AArch64BranchTargets.cpp
---
bolt/include/bolt/Core/MCPlusBuilder.h | 14 ++
.../Target/AArch64/AArch64MCPlusBuilder.cpp| 18 ++
bolt/unittests/Core/MCPlusBuilder.cpp | 17 +
.../Target/AArch64/AArch64BranchTargets.cpp| 6 --
4 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h
b/bolt/include/bolt/Core/MCPlusBuilder.h
index 9cbff02619bd2..6760586876d94 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1865,6 +1865,20 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}
+ /// Check if an Instruction is a BTI landing pad with the required
properties.
+ /// Takes both explicit and implicit BTIs into account.
+ virtual bool isBTILandingPad(MCInst &Inst, bool CouldCall,
+ bool CouldJump) const {
+llvm_unreachable("not implemented");
+return false;
+ }
+
+ /// Check if an Instruction is an implicit BTI c landing pad.
+ virtual bool isImplicitBTIC(MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return false;
+ }
+
/// Create a BTI landing pad instruction.
virtual void createBTI(MCInst &Inst, bool CouldCall, bool CouldJump) const {
llvm_unreachable("not implemented");
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 8cfde5701ee7f..0cf3db8a6fc2a 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -2706,6 +2706,24 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return Insts;
}
+ bool isBTILandingPad(MCInst &Inst, bool CouldCall,
+ bool CouldJump) const override {
+unsigned HintNum = getBTIHintNum(CouldCall, CouldJump);
+bool IsExplicitBTI =
+Inst.getOpcode() == AArch64::HINT && Inst.getNumOperands() == 1 &&
+Inst.getOperand(0).isImm() && Inst.getOperand(0).getImm() == HintNum;
+
+bool IsImplicitBTI = HintNum == 34 && isImplicitBTIC(Inst);
+return IsExplicitBTI || IsImplicitBTI;
+ }
+
+ bool isImplicitBTIC(MCInst &Inst) const override {
+// PACI[AB]SP are always implicitly BTI C, independently of
+// SCTLR_EL1.BT[01].
+return Inst.getOpcode() == AArch64::PACIASP ||
+ Inst.getOpcode() == AArch64::PACIBSP;
+ }
+
void createBTI(MCInst &Inst, bool CouldCall, bool CouldJump) const override {
Inst.setOpcode(AArch64::HINT);
unsigned HintNum = getBTIHintNum(CouldCall, CouldJump);
diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp
b/bolt/unittests/Core/MCPlusBuilder.cpp
index 33389bca8b21e..439d72a343ce8 100644
--- a/bolt/unittests/Core/MCPlusBuilder.cpp
+++ b/bolt/unittests/Core/MCPlusBuilder.cpp
@@ -155,22 +155,39 @@ TEST_P(MCPlusBuilderTester, AArch64_BTI) {
auto II = BB->begin();
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 38);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, true));
MCInst BTIj;
BC->MIB->createBTI(BTIj, false, true);
II = BB->addInstruction(BTIj);
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 36);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, false, true));
MCInst BTIc;
BC->MIB->createBTI(BTIc, true, false);
II = BB->addInstruction(BTIc);
ASSERT_EQ(II->getOpcode(), AArch64::HINT);
ASSERT_EQ(II->getOperand(0).getImm(), 34);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
MCInst BTIinvalid;
ASSERT_DEATH(BC->MIB->createBTI(BTIinvalid, false, false),
"No target kinds!");
+
+ MCInst Paciasp = MCInstBuilder(AArch64::PACIASP);
+ II = BB->addInstruction(Paciasp);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, true, true));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, false, true));
+ ASSERT_TRUE(BC->MIB->isImplicitBTIC(*II));
+
+ MCInst Pacibsp = MCInstBuilder(AArch64::PACIBSP);
+ II = BB->addInstruction(Pacibsp);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, true, true));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, false, true));
+ ASSERT_TRUE(BC->MIB->isImplicitBTIC(*II));
}
TEST_P(MCPlusBuilderTester, AArch64_CmpJNE) {
diff --git a/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
b/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
index b6f3e56c3a18f..ea7de840faf92 100644
--- a/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
+++ b/llvm/lib/Target/AArch64/AAr
[llvm-branch-commits] [llvm] [VPlan] Implement compressed widening of memory instructions (PR #166956)
https://github.com/skachkov-sc updated
https://github.com/llvm/llvm-project/pull/166956
>From 92342e03b192d37370c9160b13ce1048501eb079 Mon Sep 17 00:00:00 2001
From: Sergey Kachkov
Date: Fri, 7 Nov 2025 18:09:56 +0300
Subject: [PATCH] [VPlan] Implement compressed widening of memory instructions
---
.../llvm/Analysis/TargetTransformInfo.h | 1 +
.../Transforms/Vectorize/LoopVectorize.cpp| 24 ++
llvm/lib/Transforms/Vectorize/VPlan.h | 32 ---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 23 +
.../Transforms/Vectorize/VPlanTransforms.cpp | 11 ---
5 files changed, 61 insertions(+), 30 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h
b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 0f17312b03827..e8769f5860c77 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1442,6 +1442,7 @@ class TargetTransformInfo {
Normal,///< The cast is used with a normal load/store.
Masked,///< The cast is used with a masked load/store.
GatherScatter, ///< The cast is used with a gather/scatter.
+Compressed,///< The cast is used with an expand load/compress store.
Interleave,///< The cast is used with an interleaved load/store.
Reversed, ///< The cast is used with a reversed load/store.
};
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e069b2e8103e0..6565c8c036ca0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1027,6 +1027,7 @@ class LoopVectorizationCostModel {
CM_Widen_Reverse, // For consecutive accesses with stride -1.
CM_Interleave,
CM_GatherScatter,
+CM_Compressed,
CM_Scalarize,
CM_VectorCall,
CM_IntrinsicCall
@@ -3109,9 +3110,9 @@ void
LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
if (IsUniformMemOpUse(I))
return true;
-return (WideningDecision == CM_Widen ||
-WideningDecision == CM_Widen_Reverse ||
-WideningDecision == CM_Interleave);
+return (
+WideningDecision == CM_Widen || WideningDecision == CM_Widen_Reverse ||
+WideningDecision == CM_Interleave || WideningDecision ==
CM_Compressed);
};
// Returns true if Ptr is the pointer operand of a memory access instruction
@@ -5192,11 +5193,16 @@ InstructionCost
LoopVectorizationCostModel::getConsecutiveMemOpCost(
Instruction *I, ElementCount VF, InstWidening Decision) {
Type *ValTy = getLoadStoreType(I);
auto *VectorTy = cast(toVectorTy(ValTy, VF));
+ const Align Alignment = getLoadStoreAlignment(I);
unsigned AS = getLoadStoreAddressSpace(I);
+ if (Decision == CM_Compressed)
+return TTI.getExpandCompressMemoryOpCost(I->getOpcode(), VectorTy,
+ /*VariableMask*/ true, Alignment,
+ CostKind, I);
+
assert((Decision == CM_Widen || Decision == CM_Widen_Reverse) &&
"Expected widen decision.");
- const Align Alignment = getLoadStoreAlignment(I);
InstructionCost Cost = 0;
if (Legal->isMaskRequired(I)) {
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
@@ -6300,6 +6306,8 @@
LoopVectorizationCostModel::getInstructionCost(Instruction *I,
switch (getWideningDecision(I, VF)) {
case LoopVectorizationCostModel::CM_GatherScatter:
return TTI::CastContextHint::GatherScatter;
+ case LoopVectorizationCostModel::CM_Compressed:
+return TTI::CastContextHint::Compressed;
case LoopVectorizationCostModel::CM_Interleave:
return TTI::CastContextHint::Interleave;
case LoopVectorizationCostModel::CM_Scalarize:
@@ -7515,8 +7523,9 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I,
ArrayRef Operands,
LoopVectorizationCostModel::InstWidening Decision =
CM.getWideningDecision(I, Range.Start);
bool Reverse = Decision == LoopVectorizationCostModel::CM_Widen_Reverse;
+ bool Compressed = Decision == LoopVectorizationCostModel::CM_Compressed;
bool Consecutive =
- Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
+ Reverse || Compressed || Decision ==
LoopVectorizationCostModel::CM_Widen;
VPValue *Ptr = isa(I) ? Operands[0] : Operands[1];
if (Consecutive) {
@@ -7546,11 +7555,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I,
ArrayRef Operands,
}
if (LoadInst *Load = dyn_cast(I))
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
- VPIRMetadata(*Load, LVer), I->getDebugLoc());
+ Compressed, VPIRMetadata(*Load, LVer),
+ I->getDebugLoc());
StoreInst *Store = cast(I);
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0]
[llvm-branch-commits] [llvm] [BOLT][PAC] Warn about synchronous unwind tables (PR #165227)
https://github.com/bgergely0 updated
https://github.com/llvm/llvm-project/pull/165227
From 7e17eea4a5da638d1bfb375904720d287585535c Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Mon, 27 Oct 2025 09:29:54 +
Subject: [PATCH 1/2] [BOLT][PAC] Warn about synchronous unwind tables
BOLT currently ignores functions with synchronous PAuth DWARF info.
When more than 10% of functions get ignored for inconsistencies, we
should emit a warning to only use asynchronous unwind tables.
See also: #165215
---
bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp| 9 -
.../AArch64/pacret-synchronous-unwind.cpp | 33 +++
2 files changed, 41 insertions(+), 1 deletion(-)
create mode 100644 bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
diff --git a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
index 91030544d2b88..01af88818a21d 100644
--- a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
+++ b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
@@ -133,11 +133,18 @@ Error
PointerAuthCFIAnalyzer::runOnFunctions(BinaryContext &BC) {
ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
SkipPredicate, "PointerAuthCFIAnalyzer");
+
+ float IgnoredPercent = (100.0 * FunctionsIgnored) / Total;
BC.outs() << "BOLT-INFO: PointerAuthCFIAnalyzer ran on " << Total
<< " functions. Ignored " << FunctionsIgnored << " functions "
-<< format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total)
+<< format("(%.2lf%%)", IgnoredPercent)
<< " because of CFI inconsistencies\n";
+ if (IgnoredPercent >= 10.0)
+BC.outs() << "BOLT-WARNING: PointerAuthCFIAnalyzer only supports "
+ "asynchronous unwind tables. For C compilers, see "
+ "-fasynchronous-unwind-tables.\n";
+
return Error::success();
}
diff --git a/bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
b/bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
new file mode 100644
index 0..1bfeeaed3715a
--- /dev/null
+++ b/bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
@@ -0,0 +1,33 @@
+// Test to demonstrate that functions compiled with synchronous unwind tables
+// are ignored by the PointerAuthCFIAnalyzer.
+// Exception handling is needed to have _any_ unwind tables, otherwise the
+// PointerAuthCFIAnalyzer does not run on these functions, so it does not
ignore
+// any function.
+//
+// REQUIRES: system-linux,bolt-runtime
+//
+// RUN: %clangxx --target=aarch64-unknown-linux-gnu \
+// RUN: -mbranch-protection=pac-ret \
+// RUN: -fno-asynchronous-unwind-tables \
+// RUN: %s -o %t.exe -Wl,-q
+// RUN: llvm-bolt %t.exe -o %t.bolt | FileCheck %s --check-prefix=CHECK
+//
+// CHECK: PointerAuthCFIAnalyzer ran on 3 functions. Ignored
+// CHECK-NOT: 0 functions (0.00%) because of CFI inconsistencies
+// CHECK-SAME: 1 functions (33.33%) because of CFI inconsistencies
+// CHECK-NEXT: BOLT-WARNING: PointerAuthCFIAnalyzer only supports asynchronous
+// CHECK-SAME: unwind tables. For C compilers, see
-fasynchronous-unwind-tables.
+
+#include
+#include
+
+void foo() { throw std::runtime_error("Exception from foo()."); }
+
+int main() {
+ try {
+foo();
+ } catch (const std::exception &e) {
+printf("Exception caught: %s\n", e.what());
+ }
+ return 0;
+}
From d7dc55487324624608d8f2edcdfa6c3c70aa28f5 Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Tue, 28 Oct 2025 09:23:08 +
Subject: [PATCH 2/2] [BOLT] Use opts::Verbosity in PointerAuthCFIAnalyzer
---
bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp| 27 ---
bolt/test/AArch64/negate-ra-state-incorrect.s | 2 +-
2 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
index 01af88818a21d..5979d5fb01818 100644
--- a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
+++ b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
@@ -28,6 +28,10 @@
using namespace llvm;
+namespace opts {
+extern llvm::cl::opt Verbosity;
+} // namespace opts
+
namespace llvm {
namespace bolt {
@@ -43,9 +47,10 @@ bool PointerAuthCFIAnalyzer::runOnFunction(BinaryFunction
&BF) {
// Not all functions have .cfi_negate_ra_state in them. But if one
does,
// we expect psign/pauth instructions to have the hasNegateRAState
// annotation.
-BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
- << BF.getPrintName()
- << ": ptr sign/auth inst without .cfi_negate_ra_state\n";
+if (opts::Verbosity >= 1)
+ BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
+<< BF.getPrintName()
+<< ": ptr sign/auth inst without .cfi_negate_ra_state\n";
std::lock_guard Lock(IgnoreMutex);
BF.setIgnored();
return false;
@@ -65,9 +70,10 @@ bool PointerAuthCFIAnalyzer::run
[llvm-branch-commits] [llvm] [BOLT][NFC] Rename Pointer Auth DWARF rewriter passes (PR #164622)
https://github.com/bgergely0 updated
https://github.com/llvm/llvm-project/pull/164622
From 85a832fbb903f2f986efbdd65ccb48e5ee190d13 Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Wed, 22 Oct 2025 12:44:37 +
Subject: [PATCH 1/3] [BOLT][NFC] Rename Pointer Auth DWARF rewriter passes
Original names were "working titles". After initial patches are merged,
I'd like to rename these passes to names that reflect their intent
better and show their relationship to each other:
InsertNegateRAStatePass renamed to PointerAuthCFIFixup,
MarkRAStates renamed to PointerAuthCFIAnalyzer.
---
bolt/docs/PacRetDesign.md | 23 ++---
...arkRAStates.h => PointerAuthCFIAnalyzer.h} | 14
...ateRAStatePass.h => PointerAuthCFIFixup.h} | 14
bolt/lib/Core/Exceptions.cpp | 8 ++---
bolt/lib/Passes/CMakeLists.txt| 4 +--
...AStates.cpp => PointerAuthCFIAnalyzer.cpp} | 16 +-
...AStatePass.cpp => PointerAuthCFIFixup.cpp} | 32 +--
bolt/lib/Rewrite/BinaryPassManager.cpp| 8 ++---
bolt/test/AArch64/negate-ra-state-incorrect.s | 2 +-
bolt/test/AArch64/negate-ra-state.s | 8 ++---
bolt/test/AArch64/pacret-split-funcs.s| 4 +--
bolt/unittests/Passes/CMakeLists.txt | 2 +-
...ateRAState.cpp => PointerAuthCFIFixup.cpp} | 6 ++--
.../gn/secondary/bolt/lib/Passes/BUILD.gn | 4 +--
14 files changed, 73 insertions(+), 72 deletions(-)
rename bolt/include/bolt/Passes/{MarkRAStates.h => PointerAuthCFIAnalyzer.h}
(63%)
rename bolt/include/bolt/Passes/{InsertNegateRAStatePass.h =>
PointerAuthCFIFixup.h} (87%)
rename bolt/lib/Passes/{MarkRAStates.cpp => PointerAuthCFIAnalyzer.cpp} (91%)
rename bolt/lib/Passes/{InsertNegateRAStatePass.cpp =>
PointerAuthCFIFixup.cpp} (91%)
rename bolt/unittests/Passes/{InsertNegateRAState.cpp =>
PointerAuthCFIFixup.cpp} (97%)
diff --git a/bolt/docs/PacRetDesign.md b/bolt/docs/PacRetDesign.md
index c7c76cac3a100..0de2da50f8fd6 100644
--- a/bolt/docs/PacRetDesign.md
+++ b/bolt/docs/PacRetDesign.md
@@ -104,9 +104,9 @@ negate-ra-state CFIs will become invalid during BasicBlock
reordering.
## Solution design
The implementation introduces two new passes:
-1. `MarkRAStatesPass`: assigns the RA state to each instruction based on the
CFIs
-in the input binary
-2. `InsertNegateRAStatePass`: reads those assigned instruction RA states after
+1. `PointerAuthCFIAnalyzer`: assigns the RA state to each instruction based on
+the CFI in the input binary
+2. `PointerAuthCFIFixup`: reads those assigned instruction RA states after
optimizations, and emits `DW_CFA_AARCH64_negate_ra_state` CFIs at the
correct
places: wherever there is a state change between two consecutive
instructions
in the layout order.
@@ -129,7 +129,7 @@ instruction.
This special case is handled by adding an `initialRAState` bool to each
BinaryFunction.
If the `Offset` the CFI refers to is zero, we don't store an annotation, but
set
the `initialRAState` in `FillCFIInfoFor`. This information is then used in
-`MarkRAStates`.
+`PointerAuthCFIAnalyzer`.
### Binaries without DWARF info
@@ -146,7 +146,7 @@ In summary:
- pointer auth is used, and we have DWARF CFIs: passes run, and rewrite the
negate-ra-state CFI.
-### MarkRAStates pass
+### PointerAuthCFIAnalyzer pass
This pass runs before optimizations reorder anything.
@@ -173,9 +173,9 @@ what we have before the pass, and after it.
| autiasp | negate-ra-state | signed |
| ret | | unsigned |
-# Error handling in MarkRAState Pass:
+# Error handling in PointerAuthCFIAnalyzer pass:
-Whenever the MarkRAStates pass finds inconsistencies in the current
+Whenever the PointerAuthCFIAnalyzer pass finds inconsistencies in the current
BinaryFunction, it marks the function as ignored using `BF.setIgnored()`. BOLT
will not optimize this function but will emit it unchanged in the original
section
(`.bolt.org.text`).
@@ -188,16 +188,17 @@ The inconsistencies are as follows:
Users will be informed about the number of ignored functions in the pass, the
exact functions ignored, and the found inconsistency.
-### InsertNegateRAStatePass
+### PointerAuthCFIFixup
-This pass runs after optimizations. It performns the _inverse_ of MarkRAState
pa s:
+This pass runs after optimizations. It performns the _inverse_ of
PointerAuthCFIAnalyzer
+pass:
1. it reads the RA state annotations attached to the instructions, and
2. whenever the state changes, it adds a PseudoInstruction that holds an
OpNegateRAState CFI.
# Covering newly generated instructions:
-Some BOLT passes can add new Instructions. In InsertNegateRAStatePass, we have
+Some BOLT passes can add new Instructions. In PointerAuthCFIFixup, we have
to know what RA state these have.
> [!important]
@@ -230,7 +231,7 @@ freely. The only special case is function splitting. W
[llvm-branch-commits] [llvm] [BOLT][PAC] Warn about synchronous unwind tables (PR #165227)
https://github.com/bgergely0 updated
https://github.com/llvm/llvm-project/pull/165227
From 7e17eea4a5da638d1bfb375904720d287585535c Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Mon, 27 Oct 2025 09:29:54 +
Subject: [PATCH 1/2] [BOLT][PAC] Warn about synchronous unwind tables
BOLT currently ignores functions with synchronous PAuth DWARF info.
When more than 10% of functions get ignored for inconsistencies, we
should emit a warning to only use asynchronous unwind tables.
See also: #165215
---
bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp| 9 -
.../AArch64/pacret-synchronous-unwind.cpp | 33 +++
2 files changed, 41 insertions(+), 1 deletion(-)
create mode 100644 bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
diff --git a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
index 91030544d2b88..01af88818a21d 100644
--- a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
+++ b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
@@ -133,11 +133,18 @@ Error
PointerAuthCFIAnalyzer::runOnFunctions(BinaryContext &BC) {
ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
SkipPredicate, "PointerAuthCFIAnalyzer");
+
+ float IgnoredPercent = (100.0 * FunctionsIgnored) / Total;
BC.outs() << "BOLT-INFO: PointerAuthCFIAnalyzer ran on " << Total
<< " functions. Ignored " << FunctionsIgnored << " functions "
-<< format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total)
+<< format("(%.2lf%%)", IgnoredPercent)
<< " because of CFI inconsistencies\n";
+ if (IgnoredPercent >= 10.0)
+BC.outs() << "BOLT-WARNING: PointerAuthCFIAnalyzer only supports "
+ "asynchronous unwind tables. For C compilers, see "
+ "-fasynchronous-unwind-tables.\n";
+
return Error::success();
}
diff --git a/bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
b/bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
new file mode 100644
index 0..1bfeeaed3715a
--- /dev/null
+++ b/bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
@@ -0,0 +1,33 @@
+// Test to demonstrate that functions compiled with synchronous unwind tables
+// are ignored by the PointerAuthCFIAnalyzer.
+// Exception handling is needed to have _any_ unwind tables, otherwise the
+// PointerAuthCFIAnalyzer does not run on these functions, so it does not
ignore
+// any function.
+//
+// REQUIRES: system-linux,bolt-runtime
+//
+// RUN: %clangxx --target=aarch64-unknown-linux-gnu \
+// RUN: -mbranch-protection=pac-ret \
+// RUN: -fno-asynchronous-unwind-tables \
+// RUN: %s -o %t.exe -Wl,-q
+// RUN: llvm-bolt %t.exe -o %t.bolt | FileCheck %s --check-prefix=CHECK
+//
+// CHECK: PointerAuthCFIAnalyzer ran on 3 functions. Ignored
+// CHECK-NOT: 0 functions (0.00%) because of CFI inconsistencies
+// CHECK-SAME: 1 functions (33.33%) because of CFI inconsistencies
+// CHECK-NEXT: BOLT-WARNING: PointerAuthCFIAnalyzer only supports asynchronous
+// CHECK-SAME: unwind tables. For C compilers, see
-fasynchronous-unwind-tables.
+
+#include
+#include
+
+void foo() { throw std::runtime_error("Exception from foo()."); }
+
+int main() {
+ try {
+foo();
+ } catch (const std::exception &e) {
+printf("Exception caught: %s\n", e.what());
+ }
+ return 0;
+}
From d7dc55487324624608d8f2edcdfa6c3c70aa28f5 Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Tue, 28 Oct 2025 09:23:08 +
Subject: [PATCH 2/2] [BOLT] Use opts::Verbosity in PointerAuthCFIAnalyzer
---
bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp| 27 ---
bolt/test/AArch64/negate-ra-state-incorrect.s | 2 +-
2 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
index 01af88818a21d..5979d5fb01818 100644
--- a/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
+++ b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
@@ -28,6 +28,10 @@
using namespace llvm;
+namespace opts {
+extern llvm::cl::opt Verbosity;
+} // namespace opts
+
namespace llvm {
namespace bolt {
@@ -43,9 +47,10 @@ bool PointerAuthCFIAnalyzer::runOnFunction(BinaryFunction
&BF) {
// Not all functions have .cfi_negate_ra_state in them. But if one
does,
// we expect psign/pauth instructions to have the hasNegateRAState
// annotation.
-BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
- << BF.getPrintName()
- << ": ptr sign/auth inst without .cfi_negate_ra_state\n";
+if (opts::Verbosity >= 1)
+ BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
+<< BF.getPrintName()
+<< ": ptr sign/auth inst without .cfi_negate_ra_state\n";
std::lock_guard Lock(IgnoreMutex);
BF.setIgnored();
return false;
@@ -65,9 +70,10 @@ bool PointerAuthCFIAnalyzer::run
[llvm-branch-commits] [llvm] [BOLT][NFC] Rename Pointer Auth DWARF rewriter passes (PR #164622)
https://github.com/bgergely0 updated
https://github.com/llvm/llvm-project/pull/164622
From 85a832fbb903f2f986efbdd65ccb48e5ee190d13 Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Wed, 22 Oct 2025 12:44:37 +
Subject: [PATCH 1/3] [BOLT][NFC] Rename Pointer Auth DWARF rewriter passes
Original names were "working titles". After initial patches are merged,
I'd like to rename these passes to names that reflect their intent
better and show their relationship to each other:
InsertNegateRAStatePass renamed to PointerAuthCFIFixup,
MarkRAStates renamed to PointerAuthCFIAnalyzer.
---
bolt/docs/PacRetDesign.md | 23 ++---
...arkRAStates.h => PointerAuthCFIAnalyzer.h} | 14
...ateRAStatePass.h => PointerAuthCFIFixup.h} | 14
bolt/lib/Core/Exceptions.cpp | 8 ++---
bolt/lib/Passes/CMakeLists.txt| 4 +--
...AStates.cpp => PointerAuthCFIAnalyzer.cpp} | 16 +-
...AStatePass.cpp => PointerAuthCFIFixup.cpp} | 32 +--
bolt/lib/Rewrite/BinaryPassManager.cpp| 8 ++---
bolt/test/AArch64/negate-ra-state-incorrect.s | 2 +-
bolt/test/AArch64/negate-ra-state.s | 8 ++---
bolt/test/AArch64/pacret-split-funcs.s| 4 +--
bolt/unittests/Passes/CMakeLists.txt | 2 +-
...ateRAState.cpp => PointerAuthCFIFixup.cpp} | 6 ++--
.../gn/secondary/bolt/lib/Passes/BUILD.gn | 4 +--
14 files changed, 73 insertions(+), 72 deletions(-)
rename bolt/include/bolt/Passes/{MarkRAStates.h => PointerAuthCFIAnalyzer.h}
(63%)
rename bolt/include/bolt/Passes/{InsertNegateRAStatePass.h =>
PointerAuthCFIFixup.h} (87%)
rename bolt/lib/Passes/{MarkRAStates.cpp => PointerAuthCFIAnalyzer.cpp} (91%)
rename bolt/lib/Passes/{InsertNegateRAStatePass.cpp =>
PointerAuthCFIFixup.cpp} (91%)
rename bolt/unittests/Passes/{InsertNegateRAState.cpp =>
PointerAuthCFIFixup.cpp} (97%)
diff --git a/bolt/docs/PacRetDesign.md b/bolt/docs/PacRetDesign.md
index c7c76cac3a100..0de2da50f8fd6 100644
--- a/bolt/docs/PacRetDesign.md
+++ b/bolt/docs/PacRetDesign.md
@@ -104,9 +104,9 @@ negate-ra-state CFIs will become invalid during BasicBlock
reordering.
## Solution design
The implementation introduces two new passes:
-1. `MarkRAStatesPass`: assigns the RA state to each instruction based on the
CFIs
-in the input binary
-2. `InsertNegateRAStatePass`: reads those assigned instruction RA states after
+1. `PointerAuthCFIAnalyzer`: assigns the RA state to each instruction based on
+the CFI in the input binary
+2. `PointerAuthCFIFixup`: reads those assigned instruction RA states after
optimizations, and emits `DW_CFA_AARCH64_negate_ra_state` CFIs at the
correct
places: wherever there is a state change between two consecutive
instructions
in the layout order.
@@ -129,7 +129,7 @@ instruction.
This special case is handled by adding an `initialRAState` bool to each
BinaryFunction.
If the `Offset` the CFI refers to is zero, we don't store an annotation, but
set
the `initialRAState` in `FillCFIInfoFor`. This information is then used in
-`MarkRAStates`.
+`PointerAuthCFIAnalyzer`.
### Binaries without DWARF info
@@ -146,7 +146,7 @@ In summary:
- pointer auth is used, and we have DWARF CFIs: passes run, and rewrite the
negate-ra-state CFI.
-### MarkRAStates pass
+### PointerAuthCFIAnalyzer pass
This pass runs before optimizations reorder anything.
@@ -173,9 +173,9 @@ what we have before the pass, and after it.
| autiasp | negate-ra-state | signed |
| ret | | unsigned |
-# Error handling in MarkRAState Pass:
+# Error handling in PointerAuthCFIAnalyzer pass:
-Whenever the MarkRAStates pass finds inconsistencies in the current
+Whenever the PointerAuthCFIAnalyzer pass finds inconsistencies in the current
BinaryFunction, it marks the function as ignored using `BF.setIgnored()`. BOLT
will not optimize this function but will emit it unchanged in the original
section
(`.bolt.org.text`).
@@ -188,16 +188,17 @@ The inconsistencies are as follows:
Users will be informed about the number of ignored functions in the pass, the
exact functions ignored, and the found inconsistency.
-### InsertNegateRAStatePass
+### PointerAuthCFIFixup
-This pass runs after optimizations. It performns the _inverse_ of MarkRAState
pa s:
+This pass runs after optimizations. It performns the _inverse_ of
PointerAuthCFIAnalyzer
+pass:
1. it reads the RA state annotations attached to the instructions, and
2. whenever the state changes, it adds a PseudoInstruction that holds an
OpNegateRAState CFI.
# Covering newly generated instructions:
-Some BOLT passes can add new Instructions. In InsertNegateRAStatePass, we have
+Some BOLT passes can add new Instructions. In PointerAuthCFIFixup, we have
to know what RA state these have.
> [!important]
@@ -230,7 +231,7 @@ freely. The only special case is function splitting. W
[llvm-branch-commits] [llvm] [VPlan] Implement compressed widening of memory instructions (PR #166956)
https://github.com/skachkov-sc edited https://github.com/llvm/llvm-project/pull/166956 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] X86: Enable terminal rule (PR #165957)
https://github.com/qcolombet approved this pull request. https://github.com/llvm/llvm-project/pull/165957 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS 0-element globals to arrays with UINT32_MAX elements (PR #166952)
Juan Manuel Martinez =?utf-8?q?Caamaño?=,
Juan Manuel Martinez =?utf-8?q?Caamaño?=,
Juan Manuel Martinez =?utf-8?q?Caamaño?Message-ID:
In-Reply-To:
@@ -43,6 +44,29 @@ bool tryExtendLLVMBitcodeMarker(GlobalVariable &Bitcode) {
return true;
}
+bool tryExtendDynamicLDSGlobal(GlobalVariable &GV) {
+ constexpr unsigned WorkgroupAS = 3;
+ const bool IsWorkgroupExternal =
+ GV.hasExternalLinkage() && GV.getAddressSpace() == WorkgroupAS;
+ if (!IsWorkgroupExternal)
+return false;
+
+ const ArrayType *AT = dyn_cast(GV.getValueType());
+ if (!AT || AT->getNumElements() != 0)
+return false;
s-perron wrote:
Sorry, I did not write that properly.
Could you have, say, a global whose type is a struct containing a 0-sized
array? What do you want to do in case?
```
@lds = external addrspace(3) global {i32, [0 x i32]}
```
https://github.com/llvm/llvm-project/pull/166952
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Hexagon: Enable terminal rule (PR #165960)
https://github.com/qcolombet approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/165960 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add definitions for vector math functions (PR #167026)
@@ -182,10 +182,63 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"]
in {
def MODF_#FPTy : RuntimeLibcall;
}
-foreach VecTy = ["V4F32", "V2F64", "NXV4F32", "NXV2F64"] in {
- def MODF_#VecTy : RuntimeLibcall;
- def SINCOS_#VecTy : RuntimeLibcall;
- def SINCOSPI_#VecTy : RuntimeLibcall;
+defvar F32VectorSuffixes = ["V2F32", "V4F32", "V8F32", "V16F32", "NXV4F32"];
+defvar F64VectorSuffixes = ["V2F64", "V4F64", "V8F64", "NXV2F64"];
arsenm wrote:
I think it helps see the scope of exactly what tablegen needs to be able to
express. At this point I think the bigger problem is expressing the logical
groups of functions per library is more pressing than the type signatures
https://github.com/llvm/llvm-project/pull/167026
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RISCV: Enable terminal rule (PR #165961)
https://github.com/qcolombet approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/165961 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RISCV: Enable terminal rule (PR #165961)
https://github.com/qcolombet edited https://github.com/llvm/llvm-project/pull/165961 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS 0-element globals to arrays with UINT32_MAX elements (PR #166952)
Juan Manuel Martinez =?utf-8?q?Caamaño?Message-ID:
In-Reply-To:
@@ -52,6 +76,9 @@ bool SPIRVPrepareGlobals::runOnModule(Module &M) {
if (GlobalVariable *Bitcode = M.getNamedGlobal("llvm.embedded.module"))
Changed |= tryExtendLLVMBitcodeMarker(*Bitcode);
+ for (GlobalVariable &GV : make_early_inc_range(M.globals()))
+Changed |= tryExtendDynamicLDSGlobal(GV);
s-perron wrote:
You can add a comment explaining why yo want to change the type. I'm not
familiar with`HIP`, and if I was looking at this without looking at the commit
message I would think it is odd.
https://github.com/llvm/llvm-project/pull/166952
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS 0-element globals to arrays with UINT32_MAX elements (PR #166952)
Juan Manuel Martinez =?utf-8?q?Caamaño?Message-ID:
In-Reply-To:
@@ -43,6 +44,29 @@ bool tryExtendLLVMBitcodeMarker(GlobalVariable &Bitcode) {
return true;
}
+bool tryExtendDynamicLDSGlobal(GlobalVariable &GV) {
+ constexpr unsigned WorkgroupAS = 3;
+ const bool IsWorkgroupExternal =
+ GV.hasExternalLinkage() && GV.getAddressSpace() == WorkgroupAS;
+ if (!IsWorkgroupExternal)
+return false;
+
+ const ArrayType *AT = dyn_cast(GV.getValueType());
+ if (!AT || AT->getNumElements() != 0)
+return false;
s-perron wrote:
What do you want to do with 0-sized arrays that are not the type of the global
value? Is even possible to do that? Comments explaining why you limit this to
just the type of the GV would be useful.
https://github.com/llvm/llvm-project/pull/166952
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS 0-element globals to arrays with UINT32_MAX elements (PR #166952)
Juan Manuel Martinez =?utf-8?q?Caamaño?Message-ID:
In-Reply-To:
@@ -43,6 +44,29 @@ bool tryExtendLLVMBitcodeMarker(GlobalVariable &Bitcode) {
return true;
}
+bool tryExtendDynamicLDSGlobal(GlobalVariable &GV) {
+ constexpr unsigned WorkgroupAS = 3;
s-perron wrote:
It is very unlinkely that this will change, but could this be changed to used
named constants:
```suggestion
constexpr unsigned WorkgroupAS =
storageClassToAddressSpace(SPIRV::StorageClass::Workgroup);
```
It should all get folded at compile time since it is all constexpr.
https://github.com/llvm/llvm-project/pull/166952
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS 0-element globals to arrays with UINT32_MAX elements (PR #166952)
Juan Manuel Martinez =?utf-8?q?Caamaño?Message-ID: In-Reply-To: https://github.com/s-perron edited https://github.com/llvm/llvm-project/pull/166952 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SPIRV][SPIRVPrepareGlobals] Map AMD's dynamic LDS 0-element globals to arrays with UINT32_MAX elements (PR #166952)
Juan Manuel Martinez =?utf-8?q?Caamaño?Message-ID: In-Reply-To: https://github.com/s-perron commented: I'm wondering if you need to change all 0-sized array or not. If so, we might want to centralize the conversion of 0-sized arrays. We could try to move the code that changes them to 1 element arrays here as well. I can look into changes HLSL use of zero-sized arrays if needed. https://github.com/llvm/llvm-project/pull/166952 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BPF] Add CLI option to enable misaligned memory access (PR #167013)
yonghong-song wrote: misaligned memory access is bad for performance and may have issues for verification (or make verification more complex). Do you have concrete C code to illustrate this? Can the C code easily converted to aligned memory access? cc @4ast https://github.com/llvm/llvm-project/pull/167013 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AArch64][llvm] Add instructions for FEAT_MOPS_GO (PR #164913)
https://github.com/jthackray updated
https://github.com/llvm/llvm-project/pull/164913
>From 1080ae61785b5d11c29d4074ca783bd1050a8540 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray
Date: Tue, 2 Sep 2025 16:26:53 +0100
Subject: [PATCH 1/3] [AArch64][llvm] Add instructions for FEAT_MOPS_GO
Add the following `FEAT_MOPS_GO` instructions:
* `SETGOP`, `SETGOM`, `SETGOE`
* `SETGOPN`, `SETGOMN`, `SETGOEN`
* `SETGOPT`, `SETGOMT`, `SETGOET`
* `SETGOPTN`, `SETGOMTN`, `SETGOETN`
as documented here:
https://developer.arm.com/documentation/109697/2025_09/Future-Architecture-Technologies
---
clang/test/Driver/aarch64-vfat.c | 4 +
.../print-supported-extensions-aarch64.c | 1 +
llvm/lib/Target/AArch64/AArch64Features.td| 3 +
.../lib/Target/AArch64/AArch64InstrFormats.td | 39 +---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 11 +++
.../AArch64/AsmParser/AArch64AsmParser.cpp| 15 +++-
.../test/MC/AArch64/arm-mops-go-diagnostics.s | 56
llvm/test/MC/AArch64/arm-mops-go.s| 89 +++
.../TargetParser/TargetParserTest.cpp | 3 +
9 files changed, 206 insertions(+), 15 deletions(-)
create mode 100644 llvm/test/MC/AArch64/arm-mops-go-diagnostics.s
create mode 100644 llvm/test/MC/AArch64/arm-mops-go.s
diff --git a/clang/test/Driver/aarch64-vfat.c b/clang/test/Driver/aarch64-vfat.c
index fa268641a86e0..63096336ceb76 100644
--- a/clang/test/Driver/aarch64-vfat.c
+++ b/clang/test/Driver/aarch64-vfat.c
@@ -13,3 +13,7 @@
// RUN: %clang -target aarch64 -march=armv9.7a+btie -### -c %s 2>&1 |
FileCheck -check-prefix=VFAT-BTIE %s
// RUN: %clang -target aarch64 -march=armv9.7-a+btie -### -c %s 2>&1 |
FileCheck -check-prefix=VFAT-BTIE %s
// VFAT-BTIE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
"-target-feature" "+v9.7a"{{.*}} "-target-feature" "+btie"
+
+// RUN: %clang -target aarch64 -march=armv9.7a+mops-go -### -c %s 2>&1 |
FileCheck -check-prefix=VFAT-MOPS-GO %s
+// RUN: %clang -target aarch64 -march=armv9.7-a+mops-go -### -c %s 2>&1 |
FileCheck -check-prefix=VFAT-MOPS-GO %s
+// VFAT-MOPS-GO: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu"
"generic" "-target-feature" "+v9.7a"{{.*}} "-target-feature" "+mops-go"
diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c
b/clang/test/Driver/print-supported-extensions-aarch64.c
index d0c86c7065281..93373f41ad2cf 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -50,6 +50,7 @@
// CHECK-NEXT: lsuiFEAT_LSUI
Enable Armv9.6-A unprivileged load/store instructions
// CHECK-NEXT: lut FEAT_LUT
Enable Lookup Table instructions
// CHECK-NEXT: mopsFEAT_MOPS
Enable Armv8.8-A memcpy and memset acceleration instructions
+// CHECK-NEXT: mops-go FEAT_MOPS_GO
Enable memset acceleration granule only
// CHECK-NEXT: mpamv2 FEAT_MPAMv2
Enable Armv9.7-A MPAMv2 Lookaside Buffer Invalidate instructions
// CHECK-NEXT: memtag FEAT_MTE, FEAT_MTE2
Enable Memory Tagging Extension
// CHECK-NEXT: mtetc FEAT_MTETC
Enable Virtual Memory Tagging Extension
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td
b/llvm/lib/Target/AArch64/AArch64Features.td
index c4f6e000dff66..51e602ad7e0f2 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -638,6 +638,9 @@ def FeatureS1POE2: ExtensionWithMArch<"poe2", "POE2",
"FEAT_S1POE2",
def FeatureTEV: ExtensionWithMArch<"tev", "TEV", "FEAT_TEV",
"Enable TIndex Exception-like Vector instructions">;
+def FeatureMOPS_GO: ExtensionWithMArch<"mops-go", "MOPS_GO", "FEAT_MOPS_GO",
+ "Enable memset acceleration granule only">;
+
//===--===//
// Other Features
//===--===//
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index ee51518ff17a2..2daf2f4152479 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -12604,7 +12604,7 @@ class MOPSMemoryMove opcode, bits<2> op1,
bits<2> op2, string asm>
: MOPSMemoryCopyMoveBase<1, opcode, op1, op2, asm>;
class MOPSMemorySetBase opcode, bit op1, bit op2,
-string asm>
+bit op3, string asm>
: I<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
(ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
asm, "\t[$Rd]!, $Rn!,
[llvm-branch-commits] [clang] [llvm] [AArch64][llvm] Add instructions for FEAT_MOPS_GO (PR #164913)
https://github.com/jthackray updated
https://github.com/llvm/llvm-project/pull/164913
>From 1080ae61785b5d11c29d4074ca783bd1050a8540 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray
Date: Tue, 2 Sep 2025 16:26:53 +0100
Subject: [PATCH 1/3] [AArch64][llvm] Add instructions for FEAT_MOPS_GO
Add the following `FEAT_MOPS_GO` instructions:
* `SETGOP`, `SETGOM`, `SETGOE`
* `SETGOPN`, `SETGOMN`, `SETGOEN`
* `SETGOPT`, `SETGOMT`, `SETGOET`
* `SETGOPTN`, `SETGOMTN`, `SETGOETN`
as documented here:
https://developer.arm.com/documentation/109697/2025_09/Future-Architecture-Technologies
---
clang/test/Driver/aarch64-vfat.c | 4 +
.../print-supported-extensions-aarch64.c | 1 +
llvm/lib/Target/AArch64/AArch64Features.td| 3 +
.../lib/Target/AArch64/AArch64InstrFormats.td | 39 +---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 11 +++
.../AArch64/AsmParser/AArch64AsmParser.cpp| 15 +++-
.../test/MC/AArch64/arm-mops-go-diagnostics.s | 56
llvm/test/MC/AArch64/arm-mops-go.s| 89 +++
.../TargetParser/TargetParserTest.cpp | 3 +
9 files changed, 206 insertions(+), 15 deletions(-)
create mode 100644 llvm/test/MC/AArch64/arm-mops-go-diagnostics.s
create mode 100644 llvm/test/MC/AArch64/arm-mops-go.s
diff --git a/clang/test/Driver/aarch64-vfat.c b/clang/test/Driver/aarch64-vfat.c
index fa268641a86e0..63096336ceb76 100644
--- a/clang/test/Driver/aarch64-vfat.c
+++ b/clang/test/Driver/aarch64-vfat.c
@@ -13,3 +13,7 @@
// RUN: %clang -target aarch64 -march=armv9.7a+btie -### -c %s 2>&1 |
FileCheck -check-prefix=VFAT-BTIE %s
// RUN: %clang -target aarch64 -march=armv9.7-a+btie -### -c %s 2>&1 |
FileCheck -check-prefix=VFAT-BTIE %s
// VFAT-BTIE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
"-target-feature" "+v9.7a"{{.*}} "-target-feature" "+btie"
+
+// RUN: %clang -target aarch64 -march=armv9.7a+mops-go -### -c %s 2>&1 |
FileCheck -check-prefix=VFAT-MOPS-GO %s
+// RUN: %clang -target aarch64 -march=armv9.7-a+mops-go -### -c %s 2>&1 |
FileCheck -check-prefix=VFAT-MOPS-GO %s
+// VFAT-MOPS-GO: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu"
"generic" "-target-feature" "+v9.7a"{{.*}} "-target-feature" "+mops-go"
diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c
b/clang/test/Driver/print-supported-extensions-aarch64.c
index d0c86c7065281..93373f41ad2cf 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -50,6 +50,7 @@
// CHECK-NEXT: lsuiFEAT_LSUI
Enable Armv9.6-A unprivileged load/store instructions
// CHECK-NEXT: lut FEAT_LUT
Enable Lookup Table instructions
// CHECK-NEXT: mopsFEAT_MOPS
Enable Armv8.8-A memcpy and memset acceleration instructions
+// CHECK-NEXT: mops-go FEAT_MOPS_GO
Enable memset acceleration granule only
// CHECK-NEXT: mpamv2 FEAT_MPAMv2
Enable Armv9.7-A MPAMv2 Lookaside Buffer Invalidate instructions
// CHECK-NEXT: memtag FEAT_MTE, FEAT_MTE2
Enable Memory Tagging Extension
// CHECK-NEXT: mtetc FEAT_MTETC
Enable Virtual Memory Tagging Extension
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td
b/llvm/lib/Target/AArch64/AArch64Features.td
index c4f6e000dff66..51e602ad7e0f2 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -638,6 +638,9 @@ def FeatureS1POE2: ExtensionWithMArch<"poe2", "POE2",
"FEAT_S1POE2",
def FeatureTEV: ExtensionWithMArch<"tev", "TEV", "FEAT_TEV",
"Enable TIndex Exception-like Vector instructions">;
+def FeatureMOPS_GO: ExtensionWithMArch<"mops-go", "MOPS_GO", "FEAT_MOPS_GO",
+ "Enable memset acceleration granule only">;
+
//===--===//
// Other Features
//===--===//
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index ee51518ff17a2..2daf2f4152479 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -12604,7 +12604,7 @@ class MOPSMemoryMove opcode, bits<2> op1,
bits<2> op2, string asm>
: MOPSMemoryCopyMoveBase<1, opcode, op1, op2, asm>;
class MOPSMemorySetBase opcode, bit op1, bit op2,
-string asm>
+bit op3, string asm>
: I<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
(ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
asm, "\t[$Rd]!, $Rn!,
[llvm-branch-commits] [clang] [llvm] [AArch64][llvm] Add instructions for FEAT_MOPS_GO (PR #164913)
github-actions[bot] wrote:
:warning: C/C++ code formatter, clang-format found issues in your code.
:warning:
You can test this locally with the following command:
``bash
git-clang-format --diff origin/main HEAD --extensions c,cpp --
clang/test/Driver/aarch64-vfat.c
clang/test/Driver/print-supported-extensions-aarch64.c
llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
llvm/unittests/TargetParser/TargetParserTest.cpp --diff_from_common_commit
``
:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:
View the diff from clang-format here.
``diff
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 6ff99b7b1..4eb762a00 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1533,8 +1533,8 @@ static DecodeStatus DecodeSETMemOpInstruction(MCInst
&Inst, uint32_t insn,
}
static DecodeStatus DecodeSETMemGoOpInstruction(MCInst &Inst, uint32_t insn,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
+uint64_t Addr,
+const MCDisassembler *Decoder)
{
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
``
https://github.com/llvm/llvm-project/pull/164913
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add more function entries from TargetLibraryInfo (PR #167082)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/167082
>From a25d7fec17a54d1ad29d1e2c7f1e28b22fcfe411 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 7 Nov 2025 18:51:03 -0800
Subject: [PATCH] RuntimeLibcalls: Add more function entries from
TargetLibraryInfo
Script scraped dump of most functions in TargetLibraryInfo.def,
with existing entries and a few special cases removed. This only
adds the definitions, and doesn't add them to any system yet.
Adding them in the correct places is the hard part, since it's
all written as opt-out with manually written exemptions in
TargetLibraryInfo.
---
llvm/include/llvm/IR/RuntimeLibcalls.td | 645
1 file changed, 645 insertions(+)
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td
b/llvm/include/llvm/IR/RuntimeLibcalls.td
index 929959a4735b0..5f52014247060 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -154,6 +154,8 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in {
def SINCOS_#FPTy : RuntimeLibcall;
def REMQUO_#FPTy : RuntimeLibcall;
def FDIM_#FPTy : RuntimeLibcall;
+
+ def CABS_#FPTy : RuntimeLibcall;
}
foreach FPTy = [ "F32", "F64" ] in {
@@ -571,6 +573,302 @@ def OBJC_RETAIN_AUTORELEASE : RuntimeLibcall;
def OBJC_SYNC_ENTER : RuntimeLibcall;
def OBJC_SYNC_EXIT : RuntimeLibcall;
+def ABORT : RuntimeLibcall;
+def ABS : RuntimeLibcall;
+def ACCESS : RuntimeLibcall;
+def ALIGNED_ALLOC : RuntimeLibcall;
+def ATEXIT : RuntimeLibcall;
+def ATOF : RuntimeLibcall;
+def ATOI : RuntimeLibcall;
+def ATOL : RuntimeLibcall;
+def ATOLL : RuntimeLibcall;
+def BCMP : RuntimeLibcall;
+def BCOPY : RuntimeLibcall;
+def CHMOD : RuntimeLibcall;
+def CHOWN : RuntimeLibcall;
+def CLEARERR : RuntimeLibcall;
+def CLOSEDIR : RuntimeLibcall;
+def CTERMID : RuntimeLibcall;
+def CXA_ATEXIT : RuntimeLibcall;
+def CXA_GUARD_ABORT : RuntimeLibcall;
+def CXA_GUARD_ACQUIRE : RuntimeLibcall;
+def CXA_GUARD_RELEASE : RuntimeLibcall;
+def CXA_THROW : RuntimeLibcall;
+def DUNDER_ISOC99_SCANF : RuntimeLibcall;
+def DUNDER_ISOC99_SSCANF : RuntimeLibcall;
+def DUNDER_STRDUP : RuntimeLibcall;
+def DUNDER_STRNDUP : RuntimeLibcall;
+def DUNDER_STRTOK_R : RuntimeLibcall;
+def ENUM_VARIANT : RuntimeLibcall;
+def EXECL : RuntimeLibcall;
+def EXECLE : RuntimeLibcall;
+def EXECLP : RuntimeLibcall;
+def EXECV : RuntimeLibcall;
+def EXECVE : RuntimeLibcall;
+def EXECVP : RuntimeLibcall;
+def EXECVPE : RuntimeLibcall;
+def EXIT : RuntimeLibcall;
+def FCLOSE : RuntimeLibcall;
+def FDOPEN : RuntimeLibcall;
+def FEOF : RuntimeLibcall;
+def FERROR : RuntimeLibcall;
+def FFLUSH : RuntimeLibcall;
+def FFS : RuntimeLibcall;
+def FFSL : RuntimeLibcall;
+def FFSLL : RuntimeLibcall;
+def FGETC : RuntimeLibcall;
+def FGETC_UNLOCKED : RuntimeLibcall;
+def FGETPOS : RuntimeLibcall;
+def FGETS : RuntimeLibcall;
+def FGETS_UNLOCKED : RuntimeLibcall;
+def FILENO : RuntimeLibcall;
+def FIPRINTF : RuntimeLibcall;
+def FLOCKFILE : RuntimeLibcall;
+def FLS : RuntimeLibcall;
+def FLSL : RuntimeLibcall;
+def FLSLL : RuntimeLibcall;
+def FOPEN : RuntimeLibcall;
+def FOPEN64 : RuntimeLibcall;
+def FORK : RuntimeLibcall;
+def FPRINTF : RuntimeLibcall;
+def FPUTC : RuntimeLibcall;
+def FPUTC_UNLOCKED : RuntimeLibcall;
+def FPUTS : RuntimeLibcall;
+def FPUTS_UNLOCKED : RuntimeLibcall;
+def FREAD : RuntimeLibcall;
+def FREAD_UNLOCKED : RuntimeLibcall;
+def FSCANF : RuntimeLibcall;
+def FSEEK : RuntimeLibcall;
+def FSEEKO : RuntimeLibcall;
+def FSEEKO64 : RuntimeLibcall;
+def FSETPOS : RuntimeLibcall;
+def FSTAT : RuntimeLibcall;
+def FSTAT64 : RuntimeLibcall;
+def FSTATVFS : RuntimeLibcall;
+def FSTATVFS64 : RuntimeLibcall;
+def FTELL : RuntimeLibcall;
+def FTELLO : RuntimeLibcall;
+def FTELLO64 : RuntimeLibcall;
+def FTRYLOCKFILE : RuntimeLibcall;
+def FUNLOCKFILE : RuntimeLibcall;
+def FWRITE : RuntimeLibcall;
+def FWRITE_UNLOCKED : RuntimeLibcall;
+def GETC : RuntimeLibcall;
+def GETCHAR : RuntimeLibcall;
+def GETCHAR_UNLOCKED : RuntimeLibcall;
+def GETC_UNLOCKED : RuntimeLibcall;
+def GETENV : RuntimeLibcall;
+def GETITIMER : RuntimeLibcall;
+def GETLOGIN_R : RuntimeLibcall;
+def GETPWNAM : RuntimeLibcall;
+def GETS : RuntimeLibcall;
+def GETTIMEOFDAY : RuntimeLibcall;
+def HTONL : RuntimeLibcall;
+def HTONS : RuntimeLibcall;
+def IPRINTF : RuntimeLibcall;
+def ISASCII : RuntimeLibcall;
+def ISDIGIT : RuntimeLibcall;
+def LABS : RuntimeLibcall;
+def LCHOWN : RuntimeLibcall;
+def LLABS : RuntimeLibcall;
+def LSTAT : RuntimeLibcall;
+def LSTAT64 : RuntimeLibcall;
+def MEMALIGN : RuntimeLibcall;
+def MEMCCPY : RuntimeLibcall;
+def MEMCCPY_CHK : RuntimeLibcall;
+def MEMCHR : RuntimeLibcall;
+def MEMPCPY : RuntimeLibcall;
+def MEMPCPY_CHK : RuntimeLibcall;
+def MEMRCHR : RuntimeLibcall;
+def MEMSET_PATTERN16 : RuntimeLibcall;
+def MEMSET_PATTERN4 : RuntimeLibcall;
+def MEMSET_PATTERN8 : RuntimeLibcall;
+def MKDIR : RuntimeLibcall;
+def MKTIME : RuntimeLibcall;
+def MSVC_DELETE_ARRAY_PTR32
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add __memcpy_chk, __memmove_chk, __memset_chk (PR #167053)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/167053
>From e5510192d0f6f17d6a69b882c3e008629509ccbc Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 7 Nov 2025 15:27:24 -0800
Subject: [PATCH] RuntimeLibcalls: Add __memcpy_chk, __memmove_chk,
__memset_chk
These were in TargetLibraryInfo, but missing from RuntimeLibcalls.
This only adds the cases that already have the non-chk variants
already. Copies the enabled-by-default logic from TargetLibraryInfo,
which is probably overly permissive. Only isPS opts-out.
---
llvm/include/llvm/IR/RuntimeLibcalls.td | 17 +++--
.../Util/DeclareRuntimeLibcalls/basic.ll| 4
.../Util/DeclareRuntimeLibcalls/ps.ll | 6 ++
3 files changed, 25 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/Transforms/Util/DeclareRuntimeLibcalls/ps.ll
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td
b/llvm/include/llvm/IR/RuntimeLibcalls.td
index 3fb55ad40e71b..f6ad23a4f9c49 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -35,6 +35,9 @@ def isNotOSLinuxAndNotOSOpenBSD : RuntimeLibcallPredicate<
def isNotOSAIXAndNotOSOpenBSD : RuntimeLibcallPredicate<
[{!TT.isOSAIX() && !TT.isOSOpenBSD()}]>;
+def isNotPS : RuntimeLibcallPredicate<
+ [{!TT.isPS()}]>;
+
// OpenBSD uses __guard_local. AIX uses __ssp_canary_word, MSVC/Windows
// Itanium uses __security_cookie
def hasStackChkFail : RuntimeLibcallPredicate<
@@ -374,8 +377,11 @@ foreach FPTy = ["F32", "F64", "F128", "PPCF128"] in {
// Memory
def MEMCMP : RuntimeLibcall;
def MEMCPY : RuntimeLibcall;
+def MEMCPY_CHK : RuntimeLibcall;
def MEMMOVE : RuntimeLibcall;
+def MEMMOVE_CHK : RuntimeLibcall;
def MEMSET : RuntimeLibcall;
+def MEMSET_CHK : RuntimeLibcall;
def CALLOC : RuntimeLibcall;
def BZERO : RuntimeLibcall;
def STRLEN : RuntimeLibcall;
@@ -1091,6 +1097,10 @@ def memcpy : RuntimeLibcallImpl;
def memmove : RuntimeLibcallImpl;
def memset : RuntimeLibcallImpl;
+def __memcpy_chk : RuntimeLibcallImpl;
+def __memmove_chk : RuntimeLibcallImpl;
+def __memset_chk : RuntimeLibcallImpl;
+
// DSEPass can emit calloc if it finds a pair of malloc/memset
def calloc : RuntimeLibcallImpl;
@@ -2624,8 +2634,10 @@ defvar X86_F128_Libcalls = LibcallImpls<(add
LibmF128Libcalls, LibmF128FiniteLib
defvar SinCosF32F64Libcalls = LibcallImpls<(add sincosf, sincos),
hasSinCos_f32_f64>;
+defvar MemChkLibcalls = [__memcpy_chk, __memset_chk, __memmove_chk];
+
defvar X86CommonLibcalls =
- (add (sub WinDefaultLibcallImpls, WindowsDivRemMulLibcallOverrides),
+ (add (sub WinDefaultLibcallImpls, WindowsDivRemMulLibcallOverrides,
MemChkLibcalls),
DarwinSinCosStret, DarwinExp10,
X86_F128_Libcalls,
LibmHasSinCosF80, // FIXME: Depends on long double
@@ -2641,7 +2653,8 @@ defvar X86CommonLibcalls =
// FIXME: MSVCRT doesn't have powi. The f128 case is added as a
// hack for one test relying on it.
__powitf2_f128,
- DefaultStackProtector
+ DefaultStackProtector,
+ LibcallImpls<(add MemChkLibcalls), isNotPS>
);
defvar Windows32DivRemMulCalls =
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll
b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll
index be8cae261c7bf..db0cc24c287bc 100644
--- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll
@@ -12,6 +12,10 @@ define float @sinf(float %x) {
; CHECK: declare void @_Unwind_Resume(...)
+; CHECK: declare void @__memcpy_chk(...)
+; CHECK: declare void @__memmove_chk(...)
+; CHECK: declare void @__memset_chk(...)
+
; CHECK: declare void @__umodti3(...)
; CHECK: declare void @acosf(...)
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/ps.ll
b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/ps.ll
new file mode 100644
index 0..bcdcc63400f72
--- /dev/null
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/ps.ll
@@ -0,0 +1,6 @@
+; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=x86_64-scei-ps4 < %s |
FileCheck %s
+; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=x86_64-scei-ps5 < %s |
FileCheck %s
+
+; CHECK-NOT: __memcpy_chk
+; CHECK-NOT: __memset_chk
+; CHECK-NOT: __memmove_chk
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Move expandMultipleResultFPLibCall to TargetLowering (NFC) (PR #166988)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/166988
>From 2d95f3357853b05f23ed426209b3ec38291ae0ce Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 7 Nov 2025 10:22:01 -0800
Subject: [PATCH] DAG: Move expandMultipleResultFPLibCall to TargetLowering
(NFC)
This kind of helper is higher level and not general enough to go directly
in SelectionDAG. Most similar utilities are in TargetLowering.
---
llvm/include/llvm/CodeGen/SelectionDAG.h | 10 --
llvm/include/llvm/CodeGen/TargetLowering.h| 10 ++
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 4 +-
.../SelectionDAG/LegalizeFloatTypes.cpp | 2 +-
.../SelectionDAG/LegalizeVectorOps.cpp| 4 +-
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 161 --
.../CodeGen/SelectionDAG/TargetLowering.cpp | 161 ++
7 files changed, 176 insertions(+), 176 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 5b331e915..b024e8a68bd6e 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1718,16 +1718,6 @@ class SelectionDAG {
/// the target's desired shift amount type.
LLVM_ABI SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op);
- /// Expands a node with multiple results to an FP or vector libcall. The
- /// libcall is expected to take all the operands of the \p Node followed by
- /// output pointers for each of the results. \p CallRetResNo can be
optionally
- /// set to indicate that one of the results comes from the libcall's return
- /// value.
- LLVM_ABI bool
- expandMultipleResultFPLibCall(RTLIB::Libcall LC, SDNode *Node,
-SmallVectorImpl &Results,
-std::optional CallRetResNo = {});
-
/// Expand the specified \c ISD::VAARG node as the Legalize pass would.
LLVM_ABI SDValue expandVAArg(SDNode *Node);
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h
b/llvm/include/llvm/CodeGen/TargetLowering.h
index 98565f423df3e..b0e98dc7062f0 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5758,6 +5758,16 @@ class LLVM_ABI TargetLowering : public
TargetLoweringBase {
/// consisting of zext/sext, extract_subvector, mul and add operations.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const;
+ /// Expands a node with multiple results to an FP or vector libcall. The
+ /// libcall is expected to take all the operands of the \p Node followed by
+ /// output pointers for each of the results. \p CallRetResNo can be
optionally
+ /// set to indicate that one of the results comes from the libcall's return
+ /// value.
+ bool expandMultipleResultFPLibCall(
+ SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
+ SmallVectorImpl &Results,
+ std::optional CallRetResNo = {}) const;
+
/// Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC
/// on the current target. A VP_SETCC will additionally be given a Mask
/// and/or EVL not equal to SDValue().
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 3ed84af6a8717..99d14a60c6ed1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4842,7 +4842,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode
*Node) {
RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
? RTLIB::getSINCOS(VT)
: RTLIB::getSINCOSPI(VT);
-bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results);
+bool Expanded = TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results);
if (!Expanded) {
DAG.getContext()->emitError(Twine("no libcall available for ") +
Node->getOperationName(&DAG));
@@ -4940,7 +4940,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode
*Node) {
EVT VT = Node->getValueType(0);
RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT)
: RTLIB::getFREXP(VT);
-bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results,
+bool Expanded = TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results,
/*CallRetResNo=*/0);
if (!Expanded)
llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 58983cb57d7f6..383a025a4d916 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1726,7 +1726,7 @@ void
DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults(
SDNode *N, RTLIB::Libcall LC, s
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add mustprogress to common function attributes (PR #167080)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/167080
>From 238ad7def6d2051e6e80a17f0018143a5b91552b Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 7 Nov 2025 17:20:09 -0800
Subject: [PATCH] RuntimeLibcalls: Add mustprogress to common function
attributes
---
llvm/lib/IR/RuntimeLibcalls.cpp | 4 ++--
llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll | 2 +-
.../Util/DeclareRuntimeLibcalls/merge_attributes.ll | 2 +-
.../Transforms/Util/DeclareRuntimeLibcalls/sincos_stret.ll| 4 ++--
llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll | 2 +-
5 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index c8d9e7801ae6b..66a1396bc2027 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -134,8 +134,8 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const
Triple &TT,
const DataLayout &DL,
RTLIB::LibcallImpl LibcallImpl) const {
static constexpr Attribute::AttrKind CommonFnAttrs[] = {
- Attribute::NoCallback, Attribute::NoFree, Attribute::NoSync,
- Attribute::NoUnwind, Attribute::WillReturn};
+ Attribute::MustProgress, Attribute::NoCallback, Attribute::NoFree,
+ Attribute::NoSync, Attribute::NoUnwind, Attribute::WillReturn};
static constexpr Attribute::AttrKind CommonPtrArgAttrs[] = {
Attribute::NoAlias, Attribute::WriteOnly, Attribute::NonNull};
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll
b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll
index e79e89c95c14a..c40cdf803474e 100644
--- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll
@@ -26,4 +26,4 @@
; CHECK: declare aarch64_vector_pcs void @armpl_vsincosq_f64(<2 x double>, ptr
noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16)
[[ATTRS]]
-; CHECK: attributes [[ATTRS]] = { nocallback nofree nosync nounwind willreturn
memory(argmem: write) }
+; CHECK: attributes [[ATTRS]] = { mustprogress nocallback nofree nosync
nounwind willreturn memory(argmem: write) }
diff --git
a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/merge_attributes.ll
b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/merge_attributes.ll
index ffbf11d4106dc..af876a7988d4e 100644
--- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/merge_attributes.ll
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/merge_attributes.ll
@@ -8,4 +8,4 @@ define noundef nofpclass(nan) float @sqrtf(float %x) "foo" {
; FIXME: Individual fields of nofpclass not merged
; CHECK: define noundef nofpclass(ninf nsub nnorm) float @sqrtf(float %x)
[[SQRT_ATTR:#[0-9]+]] {
-; CHECK: attributes [[SQRT_ATTR]] = { nocallback nofree nosync nounwind
willreturn memory(errnomem: write) "foo" }
+; CHECK: attributes [[SQRT_ATTR]] = { mustprogress nocallback nofree nosync
nounwind willreturn memory(errnomem: write) "foo" }
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sincos_stret.ll
b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sincos_stret.ll
index 57cb016bcb7f3..c7da97d410a9f 100644
--- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sincos_stret.ll
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sincos_stret.ll
@@ -16,8 +16,8 @@
; SRET: declare void @__sincos_stret(ptr sret({ double, double }) align 4,
double) [[SINCOS_ATTRS:#[0-9]+]]
; SRET: declare void @__sincosf_stret(ptr sret({ float, float }) align 4,
float) [[SINCOS_ATTRS:#[0-9]+]]
-; CHECK: attributes [[SINCOS_ATTRS]] = { nocallback nofree nosync nounwind
willreturn memory(errnomem: write) }
-; SRET: attributes [[SINCOS_ATTRS]] = { nocallback nofree nosync nounwind
willreturn memory(argmem: write, errnomem: write) }
+; CHECK: attributes [[SINCOS_ATTRS]] = { mustprogress nocallback nofree nosync
nounwind willreturn memory(errnomem: write) }
+; SRET: attributes [[SINCOS_ATTRS]] = { mustprogress nocallback nofree nosync
nounwind willreturn memory(argmem: write, errnomem: write) }
; NONE-NOT: __sincos_stret
; NONE-NOT: __sincosf_stret
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll
b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll
index ef248087f..e0d8489f7b94e 100644
--- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll
@@ -25,4 +25,4 @@
; CHECK: declare void @_ZGVsNxvl8l8_sincospi(, ptr
noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16)
[[ATTRS]]
-; CHECK: attributes [[ATTRS]] = { nocallback nofree nosync nounwind willreturn
memory(argmem: write) }
+; CHECK: attributes [[ATTRS]] = { mustprogress nocallback nofree nosync
nounwind willreturn memory(argmem: write) }
_
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add macos unlocked IO functions to systems (PR #167084)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/167084
>From 6656c9200380ab01ae091d73de8864f45d6e8e99 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 7 Nov 2025 19:42:23 -0800
Subject: [PATCH] RuntimeLibcalls: Add macos unlocked IO functions to systems
This is another of the easier to understand conditions from
TargetLibraryInfo
---
llvm/include/llvm/IR/RuntimeLibcalls.td | 8 +++-
.../Transforms/Util/DeclareRuntimeLibcalls/darwin.ll | 11 +--
2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td
b/llvm/include/llvm/IR/RuntimeLibcalls.td
index d67aeb3757ea9..dd06a3442cebb 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -24,6 +24,7 @@ def isNotOSWindows :
RuntimeLibcallPredicate<"!TT.isOSWindows()">;
def isNotOSLinux : RuntimeLibcallPredicate<[{!TT.isOSLinux()}]>;
def isNotOSMSVCRT : RuntimeLibcallPredicate<"!TT.isOSMSVCRT()">;
def isPS : RuntimeLibcallPredicate<"TT.isPS()">;
+def isMacOSX : RuntimeLibcallPredicate<[{TT.isMacOSX()}]>;
def isNotOSWindowsOrIsCygwinMinGW
: RuntimeLibcallPredicate<"!TT.isOSWindows() || TT.isOSCygMing()">;
def isWindowsMSVCEnvironment : RuntimeLibcallPredicate<
@@ -1982,6 +1983,10 @@ defvar DarwinMemsetPattern = LibcallImpls<(add
memset_pattern4,
memset_pattern16),
darwinHasMemsetPattern>;
+defvar MacOSUnlockedIO = LibcallImpls<(add
+ getc_unlocked, getchar_unlocked, putc_unlocked, putchar_unlocked),
+ isMacOSX>;
+
defvar SecurityCheckCookieIfWinMSVC =
LibcallImpls<(add __security_check_cookie, __security_cookie),
isWindowsMSVCOrItaniumEnvironment>;
@@ -2140,6 +2145,7 @@ def AArch64SystemLibrary : SystemRuntimeLibrary<
LibcallImpls<(add Int128RTLibcalls), isAArch64_ILP64>,
LibcallImpls<(add bzero), isOSDarwin>,
DarwinExp10, DarwinSinCosStret, DarwinMemsetPattern,
+ MacOSUnlockedIO,
LibmHasSinCosF32, LibmHasSinCosF64, LibmHasSinCosF128,
DefaultLibmExp10,
DefaultStackProtector,
@@ -3294,7 +3300,7 @@ defvar MemChkLibcalls = [__memcpy_chk, __memset_chk,
__memmove_chk];
defvar X86CommonLibcalls =
(add (sub WinDefaultLibcallImpls, WindowsDivRemMulLibcallOverrides,
MemChkLibcalls),
- DarwinSinCosStret, DarwinExp10, DarwinMemsetPattern,
+ DarwinSinCosStret, DarwinExp10, DarwinMemsetPattern, MacOSUnlockedIO,
X86_F128_Libcalls,
LibmHasSinCosF80, // FIXME: Depends on long double
SinCosF32F64Libcalls,
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/darwin.ll
b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/darwin.ll
index 6c63f5902f638..f2226e8149b2c 100644
--- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/darwin.ll
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/darwin.ll
@@ -1,11 +1,12 @@
; REQUIRES: aarch64-registered-target, arm-registered-target,
x86-registered-target
-; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=i386-apple-macosx10.5
< %s | FileCheck -check-prefix=HAS-MEMSET-PATTERN %s
-; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=i386-apple-macosx10.4
< %s | FileCheck -check-prefix=NO-MEMSET-PATTERN %s
+; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=i386-apple-macosx10.5
< %s | FileCheck -check-prefixes=HAS-MEMSET-PATTERN,MACOS %s
+; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=i386-apple-macosx10.4
< %s | FileCheck -check-prefixes=NO-MEMSET-PATTERN,MACOS %s
; RUN: opt -S -passes=declare-runtime-libcalls
-mtriple=x86_64-apple-macosx10.5 < %s | FileCheck
-check-prefix=HAS-MEMSET-PATTERN %s
; RUN: opt -S -passes=declare-runtime-libcalls
-mtriple=x86_64-apple-macosx10.4 < %s | FileCheck
-check-prefix=NO-MEMSET-PATTERN %s
+; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=arm64-apple-macos10.5
< %s | FileCheck -check-prefixes=HAS-MEMSET-PATTERN,MACOS %s
; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=arm64-apple-ios3 < %s
| FileCheck -check-prefix=HAS-MEMSET-PATTERN %s
; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=arm64-apple-ios2 < %s
| FileCheck -check-prefix=NO-MEMSET-PATTERN %s
@@ -15,8 +16,14 @@
; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=arm64_32-apple-watchos
< %s | FileCheck -check-prefix=HAS-MEMSET-PATTERN %s
; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=armv7k-apple-watchos <
%s | FileCheck -check-prefix=HAS-MEMSET-PATTERN %s
+; MACOS: declare void @getc_unlocked(...)
+; MACOS: declare void @getchar_unlocked(...)
+
; HAS-MEMSET-PATTERN: declare void @memset_pattern16(...)
; HAS-MEMSET-PATTERN: declare void @memset_pattern4(...)
; HAS-MEMSET-PATTERN: declare void @memset_pattern8(...)
+; MACOS: declare void @putc_unlocked(...)
+; MACOS: declare void @putchar_unlocked(...)
+
; NO-MEMSET-PATTERN-NOT: memset_pattern
___
[llvm-branch-commits] [llvm] DAG: Use sincos vector libcalls through RuntimeLibcalls (PR #166984)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/166984
>From 404f6f03a1bc499d964f724ac8aaa424f9cfb367 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Thu, 6 Nov 2025 20:29:04 -0800
Subject: [PATCH] DAG: Use sincos vector libcalls through RuntimeLibcalls
Copy new process from sincospi.
---
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 7 ++-
.../CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 14 --
llvm/lib/CodeGen/TargetLoweringBase.cpp| 18 ++
3 files changed, 28 insertions(+), 11 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 1c167af4b0478..a52ad41d0f1b3 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -334,7 +334,12 @@ class BasicTTIImplBase : public
TargetTransformInfoImplCRTPBase {
break;
case Intrinsic::sincos:
- LC = RTLIB::getSINCOS(ScalarVT);
+ LC = RTLIB::getSINCOS(VT);
+ if (LC == RTLIB::UNKNOWN_LIBCALL)
+LC = RTLIB::getSINCOS(ScalarVT);
+ else if (VT.isVector())
+IsVectorCall = true;
+
break;
default:
return std::nullopt;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index f5a54497c8a98..78d8ea0676dd7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1268,10 +1268,12 @@ void VectorLegalizer::Expand(SDNode *Node,
SmallVectorImpl &Results) {
return;
break;
-
+ case ISD::FSINCOS:
case ISD::FSINCOSPI: {
EVT VT = Node->getValueType(0);
-RTLIB::Libcall LC = RTLIB::getSINCOSPI(VT);
+RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
+? RTLIB::getSINCOS(VT)
+: RTLIB::getSINCOSPI(VT);
if (LC != RTLIB::UNKNOWN_LIBCALL &&
DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT))
return;
@@ -1280,14 +1282,6 @@ void VectorLegalizer::Expand(SDNode *Node,
SmallVectorImpl &Results) {
// scalarizing.
break;
}
- case ISD::FSINCOS: {
-// FIXME: Try to directly match vector case like fsincospi
-EVT VT = Node->getValueType(0).getVectorElementType();
-RTLIB::Libcall LC = RTLIB::getSINCOS(VT);
-if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT))
- return;
-break;
- }
case ISD::FMODF: {
EVT VT = Node->getValueType(0).getVectorElementType();
RTLIB::Libcall LC = RTLIB::getMODF(VT);
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp
b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 814b4b57a0b9b..b4eb6c357e10e 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -425,6 +425,24 @@ RTLIB::Libcall RTLIB::getCOS(EVT RetVT) {
}
RTLIB::Libcall RTLIB::getSINCOS(EVT RetVT) {
+ // TODO: Tablegen should generate this function
+ if (RetVT.isVector()) {
+if (!RetVT.isSimple())
+ return RTLIB::UNKNOWN_LIBCALL;
+switch (RetVT.getSimpleVT().SimpleTy) {
+case MVT::v4f32:
+ return RTLIB::SINCOS_V4F32;
+case MVT::v2f64:
+ return RTLIB::SINCOS_V2F64;
+case MVT::nxv4f32:
+ return RTLIB::SINCOS_NXV4F32;
+case MVT::nxv2f64:
+ return RTLIB::SINCOS_NXV2F64;
+default:
+ return RTLIB::UNKNOWN_LIBCALL;
+}
+ }
+
return getFPLibCall(RetVT, SINCOS_F32, SINCOS_F64, SINCOS_F80, SINCOS_F128,
SINCOS_PPCF128);
}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add malloc and free entries (PR #167081)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/167081
>From 74c1b3e67556e9f4d881117b22080acc2e65738d Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 7 Nov 2025 17:19:28 -0800
Subject: [PATCH] RuntimeLibcalls: Add malloc and free entries
Calloc was already here, but not the others. Also add
manual type information.
---
llvm/include/llvm/IR/RuntimeLibcalls.td | 5 ++
llvm/lib/IR/RuntimeLibcalls.cpp | 75 +++
.../Util/DeclareRuntimeLibcalls/basic.ll | 9 +++
3 files changed, 89 insertions(+)
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td
b/llvm/include/llvm/IR/RuntimeLibcalls.td
index f6ad23a4f9c49..929959a4735b0 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -382,7 +382,9 @@ def MEMMOVE : RuntimeLibcall;
def MEMMOVE_CHK : RuntimeLibcall;
def MEMSET : RuntimeLibcall;
def MEMSET_CHK : RuntimeLibcall;
+def MALLOC : RuntimeLibcall;
def CALLOC : RuntimeLibcall;
+def FREE : RuntimeLibcall;
def BZERO : RuntimeLibcall;
def STRLEN : RuntimeLibcall;
@@ -1101,8 +1103,11 @@ def __memcpy_chk : RuntimeLibcallImpl;
def __memmove_chk : RuntimeLibcallImpl;
def __memset_chk : RuntimeLibcallImpl;
+def malloc : RuntimeLibcallImpl;
+
// DSEPass can emit calloc if it finds a pair of malloc/memset
def calloc : RuntimeLibcallImpl;
+def free : RuntimeLibcallImpl;
} // End let IsDefault = true
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index 66a1396bc2027..4da2635b03182 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -129,13 +129,23 @@ bool RuntimeLibcallsInfo::darwinHasExp10(const Triple
&TT) {
}
}
+/// TODO: There is really no guarantee that sizeof(size_t) is equal to the
index
+/// size of the edfault address space. This matches TargetLibraryInfo and
should
+/// be kept in sync.
+static IntegerType *getSizeTType(LLVMContext &Ctx, const DataLayout &DL) {
+ return DL.getIndexType(Ctx, /*AddressSpace=*/0);
+}
+
std::pair
RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT,
const DataLayout &DL,
RTLIB::LibcallImpl LibcallImpl) const {
+ // TODO: NoCallback probably unsafe in general
static constexpr Attribute::AttrKind CommonFnAttrs[] = {
Attribute::MustProgress, Attribute::NoCallback, Attribute::NoFree,
Attribute::NoSync, Attribute::NoUnwind, Attribute::WillReturn};
+ static constexpr Attribute::AttrKind MemoryFnAttrs[] = {
+ Attribute::MustProgress, Attribute::NoUnwind, Attribute::WillReturn};
static constexpr Attribute::AttrKind CommonPtrArgAttrs[] = {
Attribute::NoAlias, Attribute::WriteOnly, Attribute::NonNull};
@@ -181,6 +191,71 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const
Triple &TT,
return {FunctionType::get(RetTy, {ScalarTy}, false), Attrs};
}
+ case RTLIB::impl_malloc:
+ case RTLIB::impl_calloc: {
+AttrBuilder FuncAttrBuilder(Ctx);
+for (Attribute::AttrKind Attr : MemoryFnAttrs)
+ FuncAttrBuilder.addAttribute(Attr);
+FuncAttrBuilder.addAttribute(Attribute::NoFree);
+
+AllocFnKind AllocKind = AllocFnKind::Alloc;
+if (LibcallImpl == RTLIB::impl_malloc)
+ AllocKind |= AllocFnKind::Uninitialized;
+
+// TODO: Set memory attribute
+FuncAttrBuilder.addAllocKindAttr(AllocKind);
+FuncAttrBuilder.addAttribute("alloc-family", "malloc");
+FuncAttrBuilder.addAllocSizeAttr(0, LibcallImpl == RTLIB::impl_malloc
+? std::nullopt
+: std::make_optional(1));
+
+AttributeList Attrs;
+Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder);
+
+{
+ AttrBuilder ArgAttrBuilder(Ctx);
+ for (Attribute::AttrKind AK : CommonPtrArgAttrs)
+ArgAttrBuilder.addAttribute(AK);
+
+ Attrs = Attrs.addRetAttribute(Ctx, Attribute::NoUndef);
+ Attrs = Attrs.addRetAttribute(Ctx, Attribute::NoAlias);
+ Attrs = Attrs.addParamAttribute(Ctx, 0, Attribute::NoUndef);
+ if (LibcallImpl == RTLIB::impl_calloc)
+Attrs = Attrs.addParamAttribute(Ctx, 1, Attribute::NoUndef);
+}
+
+IntegerType *SizeT = getSizeTType(Ctx, DL);
+PointerType *PtrTy = PointerType::get(Ctx, 0);
+SmallVector ArgTys = {SizeT};
+if (LibcallImpl == RTLIB::impl_calloc)
+ ArgTys.push_back(SizeT);
+
+return {FunctionType::get(PtrTy, ArgTys, false), Attrs};
+ }
+ case RTLIB::impl_free: {
+// TODO: Set memory attribute
+AttrBuilder FuncAttrBuilder(Ctx);
+for (Attribute::AttrKind Attr : MemoryFnAttrs)
+ FuncAttrBuilder.addAttribute(Attr);
+
+FuncAttrBuilder.addAllocKindAttr(AllocFnKind::Free);
+FuncAttrBuilder.addAttribute("alloc-family", "malloc");
+
+AttributeList Attrs;
+Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder);
+
+{
+ AttrBuilder Ar
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add libcall entries for sleef and armpl modf functions (PR #166985)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/166985
>From 67b8475633abef420ffd304ee6bd2509081de2bb Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Thu, 6 Nov 2025 20:44:55 -0800
Subject: [PATCH] RuntimeLibcalls: Add libcall entries for sleef and armpl modf
functions
---
llvm/include/llvm/IR/RuntimeLibcalls.td | 11
llvm/lib/IR/RuntimeLibcalls.cpp | 59 ++-
.../Util/DeclareRuntimeLibcalls/armpl.ll | 8 +++
.../Util/DeclareRuntimeLibcalls/sleef.ll | 10 +++-
4 files changed, 85 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td
b/llvm/include/llvm/IR/RuntimeLibcalls.td
index ba096414d1802..98d863bfbb000 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -183,6 +183,7 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in {
}
foreach VecTy = ["V4F32", "V2F64", "NXV4F32", "NXV2F64"] in {
+ def MODF_#VecTy : RuntimeLibcall;
def SINCOS_#VecTy : RuntimeLibcall;
def SINCOSPI_#VecTy : RuntimeLibcall;
}
@@ -1093,6 +1094,11 @@ def __security_check_cookie_arm64ec :
RuntimeLibcallImpl SleefLibcalls = {
+ def _ZGVnN2vl8_modf : RuntimeLibcallImpl;
+ def _ZGVnN4vl4_modff : RuntimeLibcallImpl;
+ def _ZGVsNxvl8_modf : RuntimeLibcallImpl;
+ def _ZGVsNxvl4_modff : RuntimeLibcallImpl;
+
def _ZGVnN2vl8l8_sincos : RuntimeLibcallImpl;
def _ZGVnN4vl4l4_sincosf : RuntimeLibcallImpl;
def _ZGVsNxvl8l8_sincos : RuntimeLibcallImpl;
@@ -1109,6 +1115,11 @@ defset list SleefLibcalls = {
//===--===//
defset list ARMPLLibcalls = {
+ def armpl_vmodfq_f64 : RuntimeLibcallImpl; //
CallingConv::AArch64_VectorCall
+ def armpl_vmodfq_f32 : RuntimeLibcallImpl; //
CallingConv::AArch64_VectorCall
+ def armpl_svmodf_f64_x : RuntimeLibcallImpl;
+ def armpl_svmodf_f32_x : RuntimeLibcallImpl;
+
def armpl_vsincosq_f64
: RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall
def armpl_vsincosq_f32
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index e66b9adb43ac4..c8d9e7801ae6b 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -43,7 +43,9 @@ RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT,
switch (ClVectorLibrary) {
case VectorLibrary::SLEEFGNUABI:
for (RTLIB::LibcallImpl Impl :
- {RTLIB::impl__ZGVnN2vl8l8_sincos, RTLIB::impl__ZGVnN4vl4l4_sincosf,
+ {RTLIB::impl__ZGVnN2vl8_modf, RTLIB::impl__ZGVnN4vl4_modff,
+ RTLIB::impl__ZGVsNxvl8_modf, RTLIB::impl__ZGVsNxvl4_modff,
+ RTLIB::impl__ZGVnN2vl8l8_sincos, RTLIB::impl__ZGVnN4vl4l4_sincosf,
RTLIB::impl__ZGVsNxvl8l8_sincos, RTLIB::impl__ZGVsNxvl4l4_sincosf,
RTLIB::impl__ZGVnN4vl4l4_sincospif,
RTLIB::impl__ZGVnN2vl8l8_sincospi,
RTLIB::impl__ZGVsNxvl4l4_sincospif,
@@ -52,7 +54,9 @@ RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT,
break;
case VectorLibrary::ArmPL:
for (RTLIB::LibcallImpl Impl :
- {RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32,
+ {RTLIB::impl_armpl_vmodfq_f64, RTLIB::impl_armpl_vmodfq_f32,
+ RTLIB::impl_armpl_svmodf_f64_x, RTLIB::impl_armpl_svmodf_f32_x,
+ RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32,
RTLIB::impl_armpl_svsincos_f64_x, RTLIB::impl_armpl_svsincos_f32_x,
RTLIB::impl_armpl_vsincospiq_f32, RTLIB::impl_armpl_vsincospiq_f64,
RTLIB::impl_armpl_svsincospi_f32_x,
@@ -197,6 +201,55 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const
Triple &TT,
fcNegNormal));
return {FuncTy, Attrs};
}
+ case RTLIB::impl__ZGVnN2vl8_modf:
+ case RTLIB::impl__ZGVnN4vl4_modff:
+ case RTLIB::impl__ZGVsNxvl8_modf:
+ case RTLIB::impl__ZGVsNxvl4_modff:
+ case RTLIB::impl_armpl_vmodfq_f64:
+ case RTLIB::impl_armpl_vmodfq_f32:
+ case RTLIB::impl_armpl_svmodf_f64_x:
+ case RTLIB::impl_armpl_svmodf_f32_x: {
+AttrBuilder FuncAttrBuilder(Ctx);
+
+bool IsF32 = LibcallImpl == RTLIB::impl__ZGVnN4vl4_modff ||
+ LibcallImpl == RTLIB::impl__ZGVsNxvl4_modff ||
+ LibcallImpl == RTLIB::impl_armpl_vmodfq_f32 ||
+ LibcallImpl == RTLIB::impl_armpl_svmodf_f32_x;
+
+bool IsScalable = LibcallImpl == RTLIB::impl__ZGVsNxvl8_modf ||
+ LibcallImpl == RTLIB::impl__ZGVsNxvl4_modff ||
+ LibcallImpl == RTLIB::impl_armpl_svmodf_f64_x ||
+ LibcallImpl == RTLIB::impl_armpl_svmodf_f32_x;
+
+Type *ScalarTy = IsF32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx);
+unsigned EC = IsF32 ? 4 : 2;
+
+Type *VecTy =
+IsScalable ? static_cast(ScalableVectorType::get(ScalarTy, EC))
+ : static_cast(FixedVectorType::get(ScalarTy, EC));
+
+for (Attr
[llvm-branch-commits] [llvm] XCore: Add iprintf to RuntimeLibcalls system library (PR #167088)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/167088 >From 41af6eab3161b1e5b37344228f1a8cb07473 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 7 Nov 2025 19:56:59 -0800 Subject: [PATCH] XCore: Add iprintf to RuntimeLibcalls system library --- llvm/include/llvm/IR/RuntimeLibcalls.td | 1 + llvm/test/Transforms/Util/DeclareRuntimeLibcalls/xcore.ll | 6 ++ 2 files changed, 7 insertions(+) create mode 100644 llvm/test/Transforms/Util/DeclareRuntimeLibcalls/xcore.ll diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index ad11216c6ee9d..acc9a1bfc0f5b 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -3349,6 +3349,7 @@ def XCoreSystemLibrary (add DefaultRuntimeLibcallImpls, exp10f, exp10, exp10l_f128, __memcpy_4, + iprintf, siprintf, fiprintf, LibcallImpls<(add LibmF128Libcalls, LibmF128FiniteLibcalls), isGNUEnvironment> )>; diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/xcore.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/xcore.ll new file mode 100644 index 0..c1326ac980f4b --- /dev/null +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/xcore.ll @@ -0,0 +1,6 @@ +; REQUIRES: webassembly-registered-target +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=xcore < %s | FileCheck %s + +; CHECK: declare void @fiprintf(...) +; CHECK: declare void @iprintf(...) +; CHECK: declare void @siprintf(...) ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add a few libm entries from TargetLibraryInfo (PR #167049)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/167049
>From ae482245395ca91f7da82dd40cdd607d1de51efa Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 7 Nov 2025 14:57:28 -0800
Subject: [PATCH] RuntimeLibcalls: Add a few libm entries from
TargetLibraryInfo
These are floating-point functions recorded in TargetLibraryInfo,
but missing from RuntimeLibcalls.
---
llvm/include/llvm/IR/RuntimeLibcalls.td | 40 +++
.../Util/DeclareRuntimeLibcalls/basic.ll | 32 +++
2 files changed, 72 insertions(+)
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td
b/llvm/include/llvm/IR/RuntimeLibcalls.td
index 743c4c4c87854..3fb55ad40e71b 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -149,6 +149,8 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in {
def ATAN_#FPTy : RuntimeLibcall;
def ATAN2_#FPTy : RuntimeLibcall;
def SINCOS_#FPTy : RuntimeLibcall;
+ def REMQUO_#FPTy : RuntimeLibcall;
+ def FDIM_#FPTy : RuntimeLibcall;
}
foreach FPTy = [ "F32", "F64" ] in {
@@ -180,6 +182,12 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in
{
def FREXP_#FPTy : RuntimeLibcall;
def SINCOSPI_#FPTy : RuntimeLibcall;
def MODF_#FPTy : RuntimeLibcall;
+ def NAN_#FPTy : RuntimeLibcall;
+ def NEXTTOWARD_#FPTy : RuntimeLibcall;
+ def REMAINDER_#FPTy : RuntimeLibcall;
+ def SCALBLN_#FPTy : RuntimeLibcall;
+ def SCALBN_#FPTy : RuntimeLibcall;
+ def TGAMMA_#FPTy : RuntimeLibcall;
}
defvar F32VectorSuffixes = ["V2F32", "V4F32", "V8F32", "V16F32", "NXV4F32"];
@@ -1034,6 +1042,38 @@ def modff : RuntimeLibcallImpl;
def modf : RuntimeLibcallImpl;
defm modfl : LibmLongDoubleLibCall;
+def nanf : RuntimeLibcallImpl;
+def nan : RuntimeLibcallImpl;
+defm nanl : LibmLongDoubleLibCall;
+
+def nexttowardf : RuntimeLibcallImpl;
+def nexttoward : RuntimeLibcallImpl;
+defm nexttowardl : LibmLongDoubleLibCall;
+
+def remainderf : RuntimeLibcallImpl;
+def remainder : RuntimeLibcallImpl;
+defm remainderl : LibmLongDoubleLibCall;
+
+def remquof : RuntimeLibcallImpl;
+def remquo : RuntimeLibcallImpl;
+defm remquol : LibmLongDoubleLibCall;
+
+def fdimf : RuntimeLibcallImpl;
+def fdim : RuntimeLibcallImpl;
+defm fdiml : LibmLongDoubleLibCall;
+
+def scalbnf : RuntimeLibcallImpl;
+def scalbn : RuntimeLibcallImpl;
+defm scalbnl : LibmLongDoubleLibCall;
+
+def scalblnf : RuntimeLibcallImpl;
+def scalbln : RuntimeLibcallImpl;
+defm scalblnl : LibmLongDoubleLibCall;
+
+def tgammaf : RuntimeLibcallImpl;
+def tgamma : RuntimeLibcallImpl;
+defm tgammal : LibmLongDoubleLibCall;
+
// Floating point environment
def fegetenv : RuntimeLibcallImpl;
def fesetenv : RuntimeLibcallImpl;
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll
b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll
index 4c8c829a59f3c..be8cae261c7bf 100644
--- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll
@@ -16,9 +16,41 @@ define float @sinf(float %x) {
; CHECK: declare void @acosf(...)
+; CHECK: declare void @fdim(...)
+; CHECK: declare void @fdimf(...)
+; CHECK: declare void @fdiml(...)
+
+; CHECK: declare void @nan(...)
+; CHECK: declare void @nanf(...)
+; CHECK: declare void @nanl(...)
+
+; CHECK: declare void @nexttoward(...)
+; CHECK: declare void @nexttowardf(...)
+; CHECK: declare void @nexttowardl(...)
+
+; CHECK: declare void @remainder(...)
+; CHECK: declare void @remainderf(...)
+; CHECK: declare void @remainderl(...)
+
+; CHECK: declare void @remquo(...)
+; CHECK: declare void @remquof(...)
+; CHECK: declare void @remquol(...)
+
+; CHECK: declare void @scalbln(...)
+; CHECK: declare void @scalblnf(...)
+; CHECK: declare void @scalblnl(...)
+
+; CHECK: declare void @scalbn(...)
+; CHECK: declare void @scalbnf(...)
+; CHECK: declare void @scalbnl(...)
+
; CHECK: declare nofpclass(ninf nsub nnorm) double @sqrt(double)
[[SQRT_ATTRS:#[0-9]+]]
; CHECK: declare nofpclass(ninf nsub nnorm) float @sqrtf(float)
[[SQRT_ATTRS:#[0-9]+]]
+; CHECK: declare void @tgamma(...)
+; CHECK: declare void @tgammaf(...)
+; CHECK: declare void @tgammal(...)
+
; CHECK: declare void @truncl(...)
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PowerPC: Add vec_malloc functions to AIX in RuntimeLibcalls (PR #167089)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/167089 >From d7558b98ac124ab368b2098bd96ee0936a2b10ae Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 7 Nov 2025 20:01:39 -0800 Subject: [PATCH] PowerPC: Add vec_malloc functions to AIX in RuntimeLibcalls --- llvm/include/llvm/IR/RuntimeLibcalls.td | 4 llvm/test/Transforms/Util/DeclareRuntimeLibcalls/aix.ll | 7 +++ 2 files changed, 11 insertions(+) create mode 100644 llvm/test/Transforms/Util/DeclareRuntimeLibcalls/aix.ll diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index acc9a1bfc0f5b..0dd1460d15932 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -3158,6 +3158,10 @@ def PPCSystemLibrary has__stack_smash_handler, has___guard_local, AvailableIf<__ssp_canary_word, isAIX>, + AvailableIf, + AvailableIf, + AvailableIf, + AvailableIf, AvailableIf<__stack_chk_fail, isNotOSOpenBSD>, AvailableIf<__stack_chk_guard, isNotOSAIXAndNotOSOpenBSD>)>; diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/aix.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/aix.ll new file mode 100644 index 0..97a3d487ad0ee --- /dev/null +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/aix.ll @@ -0,0 +1,7 @@ +; REQUIRES: webassembly-registered-target +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=powerpc64-ibm-aix < %s | FileCheck %s + +; CHECK: declare void @vec_calloc(...) +; CHECK: declare void @vec_free(...) +; CHECK: declare void @vec_malloc(...) +; CHECK: declare void @vec_realloc(...) ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Stop using TargetLibraryInfo for multi-result FP intrinsic codegen (PR #166987)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/166987
>From 990eb1e0210b76c5abd76fca30c923de3d30b4ec Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Thu, 6 Nov 2025 21:55:35 -0800
Subject: [PATCH] DAG: Stop using TargetLibraryInfo for multi-result FP
intrinsic codegen
Only use RuntimeLibcallsInfo. Remove the helper functions used to
transition.
---
llvm/include/llvm/CodeGen/SelectionDAG.h | 10 +---
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 4 +-
.../SelectionDAG/LegalizeFloatTypes.cpp | 3 +-
.../SelectionDAG/LegalizeVectorOps.cpp| 4 +-
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 60 ---
5 files changed, 16 insertions(+), 65 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 62d2f222110e4..5b331e915 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1725,17 +1725,9 @@ class SelectionDAG {
/// value.
LLVM_ABI bool
expandMultipleResultFPLibCall(RTLIB::Libcall LC, SDNode *Node,
-SmallVectorImpl &Results, EVT
CallType,
+SmallVectorImpl &Results,
std::optional CallRetResNo = {});
- // FIXME: Ths should be removed, and form using RTLIB::Libcall should be
- // preferred. Callers should resolve the exact type libcall to use.
- LLVM_ABI bool
- expandMultipleResultFPLibCall(StringRef LibcallName, CallingConv::ID CC,
-SDNode *Node, SmallVectorImpl
&Results,
-std::optional CallRetResNo = {},
-bool IsVectorMasked = false);
-
/// Expand the specified \c ISD::VAARG node as the Legalize pass would.
LLVM_ABI SDValue expandVAArg(SDNode *Node);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index a0baf821698a8..3ed84af6a8717 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4842,7 +4842,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode
*Node) {
RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
? RTLIB::getSINCOS(VT)
: RTLIB::getSINCOSPI(VT);
-bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT);
+bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results);
if (!Expanded) {
DAG.getContext()->emitError(Twine("no libcall available for ") +
Node->getOperationName(&DAG));
@@ -4940,7 +4940,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode
*Node) {
EVT VT = Node->getValueType(0);
RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT)
: RTLIB::getFREXP(VT);
-bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT,
+bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results,
/*CallRetResNo=*/0);
if (!Expanded)
llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 29c4dac12a81a..58983cb57d7f6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1726,8 +1726,7 @@ void
DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults(
SDNode *N, RTLIB::Libcall LC, std::optional CallRetResNo) {
assert(!N->isStrictFPOpcode() && "strictfp not implemented");
SmallVector Results;
- DAG.expandMultipleResultFPLibCall(LC, N, Results, N->getValueType(0),
-CallRetResNo);
+ DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo);
for (auto [ResNo, Res] : enumerate(Results)) {
SDValue Lo, Hi;
GetPairElements(Res, Lo, Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index a7ae794459331..c55e55df373e9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1275,7 +1275,7 @@ void VectorLegalizer::Expand(SDNode *Node,
SmallVectorImpl &Results) {
? RTLIB::getSINCOS(VT)
: RTLIB::getSINCOSPI(VT);
if (LC != RTLIB::UNKNOWN_LIBCALL &&
-DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT))
+DAG.expandMultipleResultFPLibCall(LC, Node, Results))
return;
// TODO: Try to see if there's a narrower call available to use before
@@ -1286,7 +1286,7 @@ void VectorLegalizer::Expand(SDNode *Node,
SmallVectorImpl &Results) {
EVT VT = Node->getValueTy
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add memset_pattern* calls to darwin systems (PR #167083)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/167083
>From 6ee0ec391022e318b79f8eae8b3172a34e79c2fa Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 7 Nov 2025 19:20:05 -0800
Subject: [PATCH] RuntimeLibcalls: Add memset_pattern* calls to darwin systems
This is one of the easier cases to comprehend in TargetLibraryInfo's
setup.
---
llvm/include/llvm/IR/RuntimeLibcalls.h| 10 +
llvm/include/llvm/IR/RuntimeLibcalls.td | 12 +++---
.../Util/DeclareRuntimeLibcalls/darwin.ll | 22 +++
3 files changed, 41 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/Transforms/Util/DeclareRuntimeLibcalls/darwin.ll
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h
b/llvm/include/llvm/IR/RuntimeLibcalls.h
index 0afe32a4ecc3c..adc0c777f0030 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.h
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.h
@@ -209,6 +209,16 @@ struct RuntimeLibcallsInfo {
return true;
}
+ static bool darwinHasMemsetPattern(const Triple &TT) {
+// memset_pattern{4,8,16} is only available on iOS 3.0 and Mac OS X 10.5
and
+// later. All versions of watchOS support it.
+if (TT.isMacOSX())
+ return !TT.isMacOSXVersionLT(10, 5);
+if (TT.isiOS())
+ return !TT.isOSVersionLT(3, 0);
+return TT.isWatchOS();
+ }
+
static bool hasAEABILibcalls(const Triple &TT) {
return TT.isTargetAEABI() || TT.isTargetGNUAEABI() ||
TT.isTargetMuslAEABI() || TT.isOSFuchsia() || TT.isAndroid();
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td
b/llvm/include/llvm/IR/RuntimeLibcalls.td
index 5f52014247060..d67aeb3757ea9 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -50,6 +50,7 @@ def isWindowsMSVCOrItaniumEnvironment :
RuntimeLibcallPredicate<
def isGNUEnvironment : RuntimeLibcallPredicate<"TT.isGNUEnvironment()">;
def darwinHasSinCosStret : RuntimeLibcallPredicate<"darwinHasSinCosStret(TT)">;
def darwinHasExp10 : RuntimeLibcallPredicate<"darwinHasExp10(TT)">;
+def darwinHasMemsetPattern :
RuntimeLibcallPredicate<[{darwinHasMemsetPattern(TT)}]>;
def hasExp10 : RuntimeLibcallPredicate<[{!TT.isOSDarwin()}]>;
@@ -1976,6 +1977,11 @@ defvar DarwinSinCosStret = LibcallImpls<(add
__sincosf_stret, __sincos_stret,
darwinHasSinCosStret>;
defvar DarwinExp10 = LibcallImpls<(add __exp10f, __exp10), darwinHasExp10>;
+defvar DarwinMemsetPattern = LibcallImpls<(add memset_pattern4,
+ memset_pattern8,
+ memset_pattern16),
+ darwinHasMemsetPattern>;
+
defvar SecurityCheckCookieIfWinMSVC =
LibcallImpls<(add __security_check_cookie, __security_cookie),
isWindowsMSVCOrItaniumEnvironment>;
@@ -2133,7 +2139,7 @@ def AArch64SystemLibrary : SystemRuntimeLibrary<
AArch64LibcallImpls,
LibcallImpls<(add Int128RTLibcalls), isAArch64_ILP64>,
LibcallImpls<(add bzero), isOSDarwin>,
- DarwinExp10, DarwinSinCosStret,
+ DarwinExp10, DarwinSinCosStret, DarwinMemsetPattern,
LibmHasSinCosF32, LibmHasSinCosF64, LibmHasSinCosF128,
DefaultLibmExp10,
DefaultStackProtector,
@@ -2603,7 +2609,7 @@ def ARMSystemLibrary
WindowARMFPIntCasts,
SecurityCheckCookieIfWinMSVC,
AEABIDivRemCalls,
- DarwinSinCosStret, DarwinExp10,
+ DarwinSinCosStret, DarwinExp10, DarwinMemsetPattern,
LibmHasSinCosF32, LibmHasSinCosF64, LibmHasSinCosF128,
DefaultLibmExp10,
@@ -3288,7 +3294,7 @@ defvar MemChkLibcalls = [__memcpy_chk, __memset_chk,
__memmove_chk];
defvar X86CommonLibcalls =
(add (sub WinDefaultLibcallImpls, WindowsDivRemMulLibcallOverrides,
MemChkLibcalls),
- DarwinSinCosStret, DarwinExp10,
+ DarwinSinCosStret, DarwinExp10, DarwinMemsetPattern,
X86_F128_Libcalls,
LibmHasSinCosF80, // FIXME: Depends on long double
SinCosF32F64Libcalls,
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/darwin.ll
b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/darwin.ll
new file mode 100644
index 0..6c63f5902f638
--- /dev/null
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/darwin.ll
@@ -0,0 +1,22 @@
+; REQUIRES: aarch64-registered-target, arm-registered-target,
x86-registered-target
+
+; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=i386-apple-macosx10.5
< %s | FileCheck -check-prefix=HAS-MEMSET-PATTERN %s
+; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=i386-apple-macosx10.4
< %s | FileCheck -check-prefix=NO-MEMSET-PATTERN %s
+
+; RUN: opt -S -passes=declare-runtime-libcalls
-mtriple=x86_64-apple-macosx10.5 < %s | FileCheck
-check-prefix=HAS-MEMSET-PATTERN %s
+; RUN: opt -S -passes=declare-runtime-libcalls
-mtriple=x86_64-apple-macosx10.4 < %s | Fi
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add definitions for vector math functions (PR #167026)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/167026
>From b983fd3f0866b3f07e98d89c82bc9ee025231777 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 7 Nov 2025 12:51:18 -0800
Subject: [PATCH] RuntimeLibcalls: Add definitions for vector math functions
This is mostly the output of a vibe coded script running on
VecFuncs.def, with a lot of manual cleanups and fixing where the
vibes were off. This is not yet wired up to anything (except for the
handful of calls which are already manually enabled). In the future
the SystemLibrary mechanism needs to be generalized to allow plugging
these sets in based on the flag.
One annoying piece is there are some name conflicts across the libraries.
Some of the libmvec functions have name collisions with some sleef functions.
I solved this by just adding a prefix to the libmvec functions. It would
probably be a good idea to add a prefix to every group. It gets ugly,
particularly since some of the sleef functions started to use a Sleef_ prefix,
but mostly do not.
---
llvm/include/llvm/IR/RuntimeLibcalls.td | 1028 +--
1 file changed, 980 insertions(+), 48 deletions(-)
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td
b/llvm/include/llvm/IR/RuntimeLibcalls.td
index 98d863bfbb000..743c4c4c87854 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -182,10 +182,63 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"]
in {
def MODF_#FPTy : RuntimeLibcall;
}
-foreach VecTy = ["V4F32", "V2F64", "NXV4F32", "NXV2F64"] in {
- def MODF_#VecTy : RuntimeLibcall;
- def SINCOS_#VecTy : RuntimeLibcall;
- def SINCOSPI_#VecTy : RuntimeLibcall;
+defvar F32VectorSuffixes = ["V2F32", "V4F32", "V8F32", "V16F32", "NXV4F32"];
+defvar F64VectorSuffixes = ["V2F64", "V4F64", "V8F64", "NXV2F64"];
+
+foreach S = !listconcat(F32VectorSuffixes, F64VectorSuffixes) in {
+ def ACOS_#S : RuntimeLibcall;
+ def ACOSH_#S : RuntimeLibcall;
+ def ASIN_#S : RuntimeLibcall;
+ def ASINH_#S : RuntimeLibcall;
+ def ATAN_#S : RuntimeLibcall;
+ def ATAN2_#S : RuntimeLibcall;
+ def ATANH_#S : RuntimeLibcall;
+ def CBRT_#S : RuntimeLibcall;
+ def CEIL_#S : RuntimeLibcall;
+ def COPYSIGN_#S : RuntimeLibcall;
+ def COS_#S : RuntimeLibcall;
+ def COSH_#S : RuntimeLibcall;
+ def COSPI_#S : RuntimeLibcall;
+ def ERFC_#S : RuntimeLibcall;
+ def ERF_#S : RuntimeLibcall;
+ def EXP_#S : RuntimeLibcall;
+ def EXP_FINITE_#S : RuntimeLibcall;
+ def EXP10_#S : RuntimeLibcall;
+ def EXP2_#S : RuntimeLibcall;
+ def EXPM1_#S : RuntimeLibcall;
+ def FABS_#S : RuntimeLibcall;
+ def FDIM_#S : RuntimeLibcall;
+ def FLOOR_#S : RuntimeLibcall;
+ def FMA_#S : RuntimeLibcall;
+ def FMAX_#S : RuntimeLibcall;
+ def FMIN_#S : RuntimeLibcall;
+ def FMOD_#S : RuntimeLibcall;
+ def HYPOT_#S : RuntimeLibcall;
+ def ILOGB_#S : RuntimeLibcall;
+ def LDEXP_#S : RuntimeLibcall;
+ def LGAMMA_#S : RuntimeLibcall;
+ def LOG_#S : RuntimeLibcall;
+ def LOG10_#S : RuntimeLibcall;
+ def LOG1P_#S : RuntimeLibcall;
+ def LOG2_#S : RuntimeLibcall;
+ def LOGB_#S : RuntimeLibcall;
+ def MODF_#S : RuntimeLibcall;
+ def NEXTAFTER_#S : RuntimeLibcall;
+ def POW_#S : RuntimeLibcall;
+ def SINCOS_#S : RuntimeLibcall;
+ def SINCOSPI_#S : RuntimeLibcall;
+ def SIN_#S : RuntimeLibcall;
+ def SINH_#S : RuntimeLibcall;
+ def SINPI_#S : RuntimeLibcall;
+ def SQRT_#S : RuntimeLibcall;
+ def TAN_#S : RuntimeLibcall;
+ def TANH_#S : RuntimeLibcall;
+ def TGAMMA_#S : RuntimeLibcall;
+}
+
+foreach S = F64VectorSuffixes in {
+ def LOG_FINITE_#S : RuntimeLibcall;
+ def POW_FINITE_#S : RuntimeLibcall;
}
def FEGETENV : RuntimeLibcall;
@@ -1089,50 +1142,6 @@ def __security_check_cookie :
RuntimeLibcallImpl;
def __security_check_cookie_arm64ec : RuntimeLibcallImpl;
-//===--===//
-// sleef calls
-//===--===//
-
-defset list SleefLibcalls = {
- def _ZGVnN2vl8_modf : RuntimeLibcallImpl;
- def _ZGVnN4vl4_modff : RuntimeLibcallImpl;
- def _ZGVsNxvl8_modf : RuntimeLibcallImpl;
- def _ZGVsNxvl4_modff : RuntimeLibcallImpl;
-
- def _ZGVnN2vl8l8_sincos : RuntimeLibcallImpl;
- def _ZGVnN4vl4l4_sincosf : RuntimeLibcallImpl;
- def _ZGVsNxvl8l8_sincos : RuntimeLibcallImpl;
- def _ZGVsNxvl4l4_sincosf : RuntimeLibcallImpl;
-
- def _ZGVnN4vl4l4_sincospif : RuntimeLibcallImpl;
- def _ZGVnN2vl8l8_sincospi : RuntimeLibcallImpl;
- def _ZGVsNxvl4l4_sincospif : RuntimeLibcallImpl;
- def _ZGVsNxvl8l8_sincospi : RuntimeLibcallImpl;
-}
-
-//===--===//
-// ARMPL calls
-//===--===//
-
-defset list ARMPLLibcalls = {
- def armpl_vmodfq_f64 : RuntimeLibcallImpl; //
CallingConv::AArch64_VectorCall
- def armpl_vmodfq_f32 : RuntimeLibcallImpl; //
Calling
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add call entries for sincos sleef and armpl libcalls (PR #166983)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/166983
>From 32550a9456f6d0d7d1c0969820073df8b4397021 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Thu, 6 Nov 2025 20:20:03 -0800
Subject: [PATCH] RuntimeLibcalls: Add call entries for sincos sleef and armpl
libcalls
These are the tested set of libcalls used for codegen of llvm.sincos
and are needed to get the legalization to follow standard procedure.
---
llvm/include/llvm/IR/RuntimeLibcalls.td | 13 +++
llvm/lib/IR/RuntimeLibcalls.cpp | 36 ---
.../Util/DeclareRuntimeLibcalls/armpl.ll | 13 +--
.../Util/DeclareRuntimeLibcalls/sleef.ll | 14 ++--
4 files changed, 67 insertions(+), 9 deletions(-)
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td
b/llvm/include/llvm/IR/RuntimeLibcalls.td
index a0b52395498c5..ba096414d1802 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -183,6 +183,7 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in {
}
foreach VecTy = ["V4F32", "V2F64", "NXV4F32", "NXV2F64"] in {
+ def SINCOS_#VecTy : RuntimeLibcall;
def SINCOSPI_#VecTy : RuntimeLibcall;
}
@@ -1092,6 +1093,11 @@ def __security_check_cookie_arm64ec :
RuntimeLibcallImpl SleefLibcalls = {
+ def _ZGVnN2vl8l8_sincos : RuntimeLibcallImpl;
+ def _ZGVnN4vl4l4_sincosf : RuntimeLibcallImpl;
+ def _ZGVsNxvl8l8_sincos : RuntimeLibcallImpl;
+ def _ZGVsNxvl4l4_sincosf : RuntimeLibcallImpl;
+
def _ZGVnN4vl4l4_sincospif : RuntimeLibcallImpl;
def _ZGVnN2vl8l8_sincospi : RuntimeLibcallImpl;
def _ZGVsNxvl4l4_sincospif : RuntimeLibcallImpl;
@@ -1103,6 +1109,13 @@ defset list SleefLibcalls = {
//===--===//
defset list ARMPLLibcalls = {
+ def armpl_vsincosq_f64
+ : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall
+ def armpl_vsincosq_f32
+ : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall
+ def armpl_svsincos_f64_x : RuntimeLibcallImpl;
+ def armpl_svsincos_f32_x : RuntimeLibcallImpl;
+
def armpl_vsincospiq_f32 : RuntimeLibcallImpl;
def armpl_vsincospiq_f64 : RuntimeLibcallImpl;
def armpl_svsincospi_f32_x : RuntimeLibcallImpl;
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index 795621701d910..e66b9adb43ac4 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -43,17 +43,26 @@ RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT,
switch (ClVectorLibrary) {
case VectorLibrary::SLEEFGNUABI:
for (RTLIB::LibcallImpl Impl :
- {RTLIB::impl__ZGVnN4vl4l4_sincospif,
RTLIB::impl__ZGVnN2vl8l8_sincospi,
+ {RTLIB::impl__ZGVnN2vl8l8_sincos, RTLIB::impl__ZGVnN4vl4l4_sincosf,
+ RTLIB::impl__ZGVsNxvl8l8_sincos, RTLIB::impl__ZGVsNxvl4l4_sincosf,
+ RTLIB::impl__ZGVnN4vl4l4_sincospif,
RTLIB::impl__ZGVnN2vl8l8_sincospi,
RTLIB::impl__ZGVsNxvl4l4_sincospif,
RTLIB::impl__ZGVsNxvl8l8_sincospi})
setAvailable(Impl);
break;
case VectorLibrary::ArmPL:
for (RTLIB::LibcallImpl Impl :
- {RTLIB::impl_armpl_vsincospiq_f32, RTLIB::impl_armpl_vsincospiq_f64,
+ {RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32,
+ RTLIB::impl_armpl_svsincos_f64_x, RTLIB::impl_armpl_svsincos_f32_x,
+ RTLIB::impl_armpl_vsincospiq_f32, RTLIB::impl_armpl_vsincospiq_f64,
RTLIB::impl_armpl_svsincospi_f32_x,
RTLIB::impl_armpl_svsincospi_f64_x})
setAvailable(Impl);
+
+for (RTLIB::LibcallImpl Impl :
+ {RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32})
+ setLibcallImplCallingConv(Impl, CallingConv::AArch64_VectorCall);
+
break;
default:
break;
@@ -188,6 +197,14 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const
Triple &TT,
fcNegNormal));
return {FuncTy, Attrs};
}
+ case RTLIB::impl__ZGVnN2vl8l8_sincos:
+ case RTLIB::impl__ZGVnN4vl4l4_sincosf:
+ case RTLIB::impl__ZGVsNxvl8l8_sincos:
+ case RTLIB::impl__ZGVsNxvl4l4_sincosf:
+ case RTLIB::impl_armpl_vsincosq_f64:
+ case RTLIB::impl_armpl_vsincosq_f32:
+ case RTLIB::impl_armpl_svsincos_f64_x:
+ case RTLIB::impl_armpl_svsincos_f32_x:
case RTLIB::impl__ZGVnN4vl4l4_sincospif:
case RTLIB::impl__ZGVnN2vl8l8_sincospi:
case RTLIB::impl__ZGVsNxvl4l4_sincospif:
@@ -201,11 +218,20 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx,
const Triple &TT,
bool IsF32 = LibcallImpl == RTLIB::impl__ZGVnN4vl4l4_sincospif ||
LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif ||
LibcallImpl == RTLIB::impl_armpl_vsincospiq_f32 ||
- LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x;
+ LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x ||
+ LibcallImpl == RTLIB::impl__ZGVnN4vl4l4_sincosf ||
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add __memcpy_chk, __memmove_chk, __memset_chk (PR #167053)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/167053
>From e5510192d0f6f17d6a69b882c3e008629509ccbc Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 7 Nov 2025 15:27:24 -0800
Subject: [PATCH] RuntimeLibcalls: Add __memcpy_chk, __memmove_chk,
__memset_chk
These were in TargetLibraryInfo, but missing from RuntimeLibcalls.
This only adds the cases that already have the non-chk variants
already. Copies the enabled-by-default logic from TargetLibraryInfo,
which is probably overly permissive. Only isPS opts-out.
---
llvm/include/llvm/IR/RuntimeLibcalls.td | 17 +++--
.../Util/DeclareRuntimeLibcalls/basic.ll| 4
.../Util/DeclareRuntimeLibcalls/ps.ll | 6 ++
3 files changed, 25 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/Transforms/Util/DeclareRuntimeLibcalls/ps.ll
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td
b/llvm/include/llvm/IR/RuntimeLibcalls.td
index 3fb55ad40e71b..f6ad23a4f9c49 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -35,6 +35,9 @@ def isNotOSLinuxAndNotOSOpenBSD : RuntimeLibcallPredicate<
def isNotOSAIXAndNotOSOpenBSD : RuntimeLibcallPredicate<
[{!TT.isOSAIX() && !TT.isOSOpenBSD()}]>;
+def isNotPS : RuntimeLibcallPredicate<
+ [{!TT.isPS()}]>;
+
// OpenBSD uses __guard_local. AIX uses __ssp_canary_word, MSVC/Windows
// Itanium uses __security_cookie
def hasStackChkFail : RuntimeLibcallPredicate<
@@ -374,8 +377,11 @@ foreach FPTy = ["F32", "F64", "F128", "PPCF128"] in {
// Memory
def MEMCMP : RuntimeLibcall;
def MEMCPY : RuntimeLibcall;
+def MEMCPY_CHK : RuntimeLibcall;
def MEMMOVE : RuntimeLibcall;
+def MEMMOVE_CHK : RuntimeLibcall;
def MEMSET : RuntimeLibcall;
+def MEMSET_CHK : RuntimeLibcall;
def CALLOC : RuntimeLibcall;
def BZERO : RuntimeLibcall;
def STRLEN : RuntimeLibcall;
@@ -1091,6 +1097,10 @@ def memcpy : RuntimeLibcallImpl;
def memmove : RuntimeLibcallImpl;
def memset : RuntimeLibcallImpl;
+def __memcpy_chk : RuntimeLibcallImpl;
+def __memmove_chk : RuntimeLibcallImpl;
+def __memset_chk : RuntimeLibcallImpl;
+
// DSEPass can emit calloc if it finds a pair of malloc/memset
def calloc : RuntimeLibcallImpl;
@@ -2624,8 +2634,10 @@ defvar X86_F128_Libcalls = LibcallImpls<(add
LibmF128Libcalls, LibmF128FiniteLib
defvar SinCosF32F64Libcalls = LibcallImpls<(add sincosf, sincos),
hasSinCos_f32_f64>;
+defvar MemChkLibcalls = [__memcpy_chk, __memset_chk, __memmove_chk];
+
defvar X86CommonLibcalls =
- (add (sub WinDefaultLibcallImpls, WindowsDivRemMulLibcallOverrides),
+ (add (sub WinDefaultLibcallImpls, WindowsDivRemMulLibcallOverrides,
MemChkLibcalls),
DarwinSinCosStret, DarwinExp10,
X86_F128_Libcalls,
LibmHasSinCosF80, // FIXME: Depends on long double
@@ -2641,7 +2653,8 @@ defvar X86CommonLibcalls =
// FIXME: MSVCRT doesn't have powi. The f128 case is added as a
// hack for one test relying on it.
__powitf2_f128,
- DefaultStackProtector
+ DefaultStackProtector,
+ LibcallImpls<(add MemChkLibcalls), isNotPS>
);
defvar Windows32DivRemMulCalls =
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll
b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll
index be8cae261c7bf..db0cc24c287bc 100644
--- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll
@@ -12,6 +12,10 @@ define float @sinf(float %x) {
; CHECK: declare void @_Unwind_Resume(...)
+; CHECK: declare void @__memcpy_chk(...)
+; CHECK: declare void @__memmove_chk(...)
+; CHECK: declare void @__memset_chk(...)
+
; CHECK: declare void @__umodti3(...)
; CHECK: declare void @acosf(...)
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/ps.ll
b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/ps.ll
new file mode 100644
index 0..bcdcc63400f72
--- /dev/null
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/ps.ll
@@ -0,0 +1,6 @@
+; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=x86_64-scei-ps4 < %s |
FileCheck %s
+; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=x86_64-scei-ps5 < %s |
FileCheck %s
+
+; CHECK-NOT: __memcpy_chk
+; CHECK-NOT: __memset_chk
+; CHECK-NOT: __memmove_chk
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add small_printf functions to emscripten (PR #167087)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/167087
>From b54d80c70a08bd9c5fab3a41142c9803593a747b Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 7 Nov 2025 19:51:41 -0800
Subject: [PATCH] RuntimeLibcalls: Add small_printf functions to emscripten
---
llvm/include/llvm/IR/RuntimeLibcalls.td | 4
.../Transforms/Util/DeclareRuntimeLibcalls/emscripten.ll| 6 ++
2 files changed, 10 insertions(+)
create mode 100644
llvm/test/Transforms/Util/DeclareRuntimeLibcalls/emscripten.ll
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td
b/llvm/include/llvm/IR/RuntimeLibcalls.td
index dd06a3442cebb..ad11216c6ee9d 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -3457,6 +3457,7 @@ def SystemZZOSSystemLibrary
def emscripten_return_address : RuntimeLibcallImpl;
def isWasm : RuntimeLibcallPredicate<"TT.isWasm()">;
+def isOSEmscripten : RuntimeLibcallPredicate<[{TT.isOSEmscripten()}]>;
// Define the emscripten name for return address helper.
// TODO: when implementing other Wasm backends, make this generic or only do
@@ -3468,6 +3469,9 @@ def WasmSystemLibrary
exp10f, exp10,
_Unwind_CallPersonality,
emscripten_return_address,
+ LibcallImpls<(add __small_printf,
+ __small_sprintf,
+ __small_fprintf), isOSEmscripten>,
__stack_chk_fail, __stack_chk_guard)>;
//===--===//
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/emscripten.ll
b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/emscripten.ll
new file mode 100644
index 0..f92f0fe2a189f
--- /dev/null
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/emscripten.ll
@@ -0,0 +1,6 @@
+; REQUIRES: webassembly-registered-target
+; RUN: opt -S -passes=declare-runtime-libcalls
-mtriple=wasm64-unknown-emscripten < %s | FileCheck %s
+
+; CHECK: declare void @__small_fprintf(...)
+; CHECK: declare void @__small_printf(...)
+; CHECK: declare void @__small_sprintf(...)
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Add macos unlocked IO functions to systems (PR #167084)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/167084
>From 6656c9200380ab01ae091d73de8864f45d6e8e99 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 7 Nov 2025 19:42:23 -0800
Subject: [PATCH] RuntimeLibcalls: Add macos unlocked IO functions to systems
This is another of the easier to understand conditions from
TargetLibraryInfo
---
llvm/include/llvm/IR/RuntimeLibcalls.td | 8 +++-
.../Transforms/Util/DeclareRuntimeLibcalls/darwin.ll | 11 +--
2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td
b/llvm/include/llvm/IR/RuntimeLibcalls.td
index d67aeb3757ea9..dd06a3442cebb 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -24,6 +24,7 @@ def isNotOSWindows :
RuntimeLibcallPredicate<"!TT.isOSWindows()">;
def isNotOSLinux : RuntimeLibcallPredicate<[{!TT.isOSLinux()}]>;
def isNotOSMSVCRT : RuntimeLibcallPredicate<"!TT.isOSMSVCRT()">;
def isPS : RuntimeLibcallPredicate<"TT.isPS()">;
+def isMacOSX : RuntimeLibcallPredicate<[{TT.isMacOSX()}]>;
def isNotOSWindowsOrIsCygwinMinGW
: RuntimeLibcallPredicate<"!TT.isOSWindows() || TT.isOSCygMing()">;
def isWindowsMSVCEnvironment : RuntimeLibcallPredicate<
@@ -1982,6 +1983,10 @@ defvar DarwinMemsetPattern = LibcallImpls<(add
memset_pattern4,
memset_pattern16),
darwinHasMemsetPattern>;
+defvar MacOSUnlockedIO = LibcallImpls<(add
+ getc_unlocked, getchar_unlocked, putc_unlocked, putchar_unlocked),
+ isMacOSX>;
+
defvar SecurityCheckCookieIfWinMSVC =
LibcallImpls<(add __security_check_cookie, __security_cookie),
isWindowsMSVCOrItaniumEnvironment>;
@@ -2140,6 +2145,7 @@ def AArch64SystemLibrary : SystemRuntimeLibrary<
LibcallImpls<(add Int128RTLibcalls), isAArch64_ILP64>,
LibcallImpls<(add bzero), isOSDarwin>,
DarwinExp10, DarwinSinCosStret, DarwinMemsetPattern,
+ MacOSUnlockedIO,
LibmHasSinCosF32, LibmHasSinCosF64, LibmHasSinCosF128,
DefaultLibmExp10,
DefaultStackProtector,
@@ -3294,7 +3300,7 @@ defvar MemChkLibcalls = [__memcpy_chk, __memset_chk,
__memmove_chk];
defvar X86CommonLibcalls =
(add (sub WinDefaultLibcallImpls, WindowsDivRemMulLibcallOverrides,
MemChkLibcalls),
- DarwinSinCosStret, DarwinExp10, DarwinMemsetPattern,
+ DarwinSinCosStret, DarwinExp10, DarwinMemsetPattern, MacOSUnlockedIO,
X86_F128_Libcalls,
LibmHasSinCosF80, // FIXME: Depends on long double
SinCosF32F64Libcalls,
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/darwin.ll
b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/darwin.ll
index 6c63f5902f638..f2226e8149b2c 100644
--- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/darwin.ll
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/darwin.ll
@@ -1,11 +1,12 @@
; REQUIRES: aarch64-registered-target, arm-registered-target,
x86-registered-target
-; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=i386-apple-macosx10.5
< %s | FileCheck -check-prefix=HAS-MEMSET-PATTERN %s
-; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=i386-apple-macosx10.4
< %s | FileCheck -check-prefix=NO-MEMSET-PATTERN %s
+; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=i386-apple-macosx10.5
< %s | FileCheck -check-prefixes=HAS-MEMSET-PATTERN,MACOS %s
+; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=i386-apple-macosx10.4
< %s | FileCheck -check-prefixes=NO-MEMSET-PATTERN,MACOS %s
; RUN: opt -S -passes=declare-runtime-libcalls
-mtriple=x86_64-apple-macosx10.5 < %s | FileCheck
-check-prefix=HAS-MEMSET-PATTERN %s
; RUN: opt -S -passes=declare-runtime-libcalls
-mtriple=x86_64-apple-macosx10.4 < %s | FileCheck
-check-prefix=NO-MEMSET-PATTERN %s
+; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=arm64-apple-macos10.5
< %s | FileCheck -check-prefixes=HAS-MEMSET-PATTERN,MACOS %s
; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=arm64-apple-ios3 < %s
| FileCheck -check-prefix=HAS-MEMSET-PATTERN %s
; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=arm64-apple-ios2 < %s
| FileCheck -check-prefix=NO-MEMSET-PATTERN %s
@@ -15,8 +16,14 @@
; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=arm64_32-apple-watchos
< %s | FileCheck -check-prefix=HAS-MEMSET-PATTERN %s
; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=armv7k-apple-watchos <
%s | FileCheck -check-prefix=HAS-MEMSET-PATTERN %s
+; MACOS: declare void @getc_unlocked(...)
+; MACOS: declare void @getchar_unlocked(...)
+
; HAS-MEMSET-PATTERN: declare void @memset_pattern16(...)
; HAS-MEMSET-PATTERN: declare void @memset_pattern4(...)
; HAS-MEMSET-PATTERN: declare void @memset_pattern8(...)
+; MACOS: declare void @putc_unlocked(...)
+; MACOS: declare void @putchar_unlocked(...)
+
; NO-MEMSET-PATTERN-NOT: memset_pattern
___
[llvm-branch-commits] [NFC][SpecialCaseList] Rename `Section::SectionStr` to `Name` (PR #167279)
https://github.com/qinkunbao approved this pull request. https://github.com/llvm/llvm-project/pull/167279 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
