[llvm-branch-commits] [libclc] release/20.x: [libclc] Include isnan implementation for SPIR-V targets (PR #140902)

2025-05-26 Thread Fraser Cormack via llvm-branch-commits

https://github.com/frasercrmck approved this pull request.


https://github.com/llvm/llvm-project/pull/140902
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [MachO] Improve bounds check (#141083) (PR #141461)

2025-05-26 Thread via llvm-branch-commits

https://github.com/llvmbot updated 
https://github.com/llvm/llvm-project/pull/141461

>From 302dccf1379057d649533e83d028c125d7caefe2 Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Mon, 26 May 2025 09:43:00 +0200
Subject: [PATCH] [MachO] Improve bounds check (#141083)

The current check may fail if the addition overflows. I've observed
failures of macho-invalid.test on 32-bit due to this.

Instead, compare against the remaining bytes until the end of the
object.

(cherry picked from commit 3f29acb51739a3e6bfb8cc623eb37cb734c98a63)
---
 llvm/lib/Object/MachOObjectFile.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Object/MachOObjectFile.cpp 
b/llvm/lib/Object/MachOObjectFile.cpp
index 69d36e6a77db7..5db264207ffb7 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -192,7 +192,8 @@ static Expected
 getLoadCommandInfo(const MachOObjectFile &Obj, const char *Ptr,
uint32_t LoadCommandIndex) {
   if (auto CmdOrErr = getStructOrErr(Obj, Ptr)) {
-if (CmdOrErr->cmdsize + Ptr > Obj.getData().end())
+assert(Ptr <= Obj.getData().end() && "Start must be before end");
+if (CmdOrErr->cmdsize > (uintptr_t)(Obj.getData().end() - Ptr))
   return malformedError("load command " + Twine(LoadCommandIndex) +
 " extends past end of file");
 if (CmdOrErr->cmdsize < 8)

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for AND OR and XOR (PR #132382)

2025-05-26 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/132382

>From 4b82d7501c8e754ac36b91924fec89a70046ff63 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 8 May 2025 12:02:27 +0200
Subject: [PATCH] AMDGPU/GlobalISel: add RegBankLegalize rules for AND OR and
 XOR

Uniform S1 is lowered to S32.
Divergent S1 is selected as VCC(S1) instruction select will select
SALU instruction based on wavesize (S32 or S64).
S16 are selected as is. There are register classes for vgpr S16.
Since some isel patterns check for sgpr S16 we don't lower to S32.
For 32 and 64 bit types we use B32/B64 rules that cover scalar vector
and pointers types.
SALU B32 and B64 and VALU B32 instructions are available.
Divergent B64 is lowered to B32.
---
 .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 34 +---
 .../AMDGPU/AMDGPURegBankLegalizeHelper.h  |  1 +
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 10 ++-
 .../AMDGPU/AMDGPURegBankLegalizeRules.h   |  2 +
 .../AMDGPU/GlobalISel/regbankselect-and.mir   | 33 ---
 .../AMDGPU/GlobalISel/regbankselect-or.mir| 85 +--
 .../AMDGPU/GlobalISel/regbankselect-xor.mir   | 84 +-
 7 files changed, 136 insertions(+), 113 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index f03365c4da8dc..050b6302a98c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -14,6 +14,7 @@
 #include "AMDGPURegBankLegalizeHelper.h"
 #include "AMDGPUGlobalISelUtils.h"
 #include "AMDGPUInstrInfo.h"
+#include "AMDGPURegBankLegalizeRules.h"
 #include "AMDGPURegisterBankInfo.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -231,6 +232,23 @@ void RegBankLegalizeHelper::lowerS_BFE(MachineInstr &MI) {
   MI.eraseFromParent();
 }
 
+void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) {
+  Register Dst = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(Dst);
+  assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64);
+  LLT Ty = DstTy == V4S16 ? V2S16 : S32;
+  auto Op1 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(1).getReg());
+  auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
+  unsigned Opc = MI.getOpcode();
+  auto Flags = MI.getFlags();
+  auto Lo =
+  B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(0), Op2.getReg(0)}, Flags);
+  auto Hi =
+  B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(1), Op2.getReg(1)}, Flags);
+  B.buildMergeLikeInstr(Dst, {Lo, Hi});
+  MI.eraseFromParent();
+}
+
 void RegBankLegalizeHelper::lower(MachineInstr &MI,
   const RegBankLLTMapping &Mapping,
   SmallSet &WaterfallSgprs) {
@@ -319,20 +337,12 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
 MI.eraseFromParent();
 return;
   }
-  case SplitTo32: {
-auto Op1 = B.buildUnmerge(VgprRB_S32, MI.getOperand(1).getReg());
-auto Op2 = B.buildUnmerge(VgprRB_S32, MI.getOperand(2).getReg());
-unsigned Opc = MI.getOpcode();
-auto Lo = B.buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(0), Op2.getReg(0)});
-auto Hi = B.buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(1), Op2.getReg(1)});
-B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi});
-MI.eraseFromParent();
-break;
-  }
   case V_BFE:
 return lowerV_BFE(MI);
   case S_BFE:
 return lowerS_BFE(MI);
+  case SplitTo32:
+return lowerSplitTo32(MI);
   case SplitLoad: {
 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
 unsigned Size = DstTy.getSizeInBits();
@@ -392,6 +402,7 @@ LLT 
RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
   case UniInVcc:
 return LLT::scalar(1);
   case Sgpr16:
+  case Vgpr16:
 return LLT::scalar(16);
   case Sgpr32:
   case Sgpr32Trunc:
@@ -511,6 +522,7 @@ 
RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
   case Sgpr32AExtBoolInReg:
   case Sgpr32SExt:
 return SgprRB;
+  case Vgpr16:
   case Vgpr32:
   case Vgpr64:
   case VgprP0:
@@ -554,6 +566,7 @@ void RegBankLegalizeHelper::applyMappingDst(
 case SgprP4:
 case SgprP5:
 case SgprV4S32:
+case Vgpr16:
 case Vgpr32:
 case Vgpr64:
 case VgprP0:
@@ -685,6 +698,7 @@ void RegBankLegalizeHelper::applyMappingSrc(
   break;
 }
 // vgpr scalars, pointers and vectors
+case Vgpr16:
 case Vgpr32:
 case Vgpr64:
 case VgprP0:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
index 2d4da4cc90ea7..bbfa7b3986fd2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
@@ -112,6 +112,7 @@ class RegBankLegalizeHelper {
 
   void lowerV_BFE(MachineInstr &MI);
   void lowerS_BFE(MachineInstr &MI);
+  void lowerSplitTo32(MachineInstr &MI);
 };
 
 } // end n

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for bit shifts and sext-inreg (PR #132385)

2025-05-26 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/132385

>From 758eec524d2500317583750a75c1885ecbf394af Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Mon, 14 Apr 2025 16:35:19 +0200
Subject: [PATCH] AMDGPU/GlobalISel: add RegBankLegalize rules for bit shifts
 and sext-inreg

Uniform S16 shifts have to be extended to S32 using appropriate Extend
before lowering to S32 instruction.
Uniform packed V2S16 are lowered to SGPR S32 instructions,
other option is to use VALU packed V2S16 and ReadAnyLane.
For uniform S32 and S64 and divergent S16, S32, S64 and V2S16 there are
instructions available.
---
 .../Target/AMDGPU/AMDGPURegBankLegalize.cpp   |   2 +-
 .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 108 +
 .../AMDGPU/AMDGPURegBankLegalizeHelper.h  |   5 +
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp |  43 +++-
 .../AMDGPU/AMDGPURegBankLegalizeRules.h   |  11 +
 llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll   |  35 ++-
 llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll   | 218 ++
 .../AMDGPU/GlobalISel/regbankselect-ashr.mir  |   6 +-
 .../AMDGPU/GlobalISel/regbankselect-lshr.mir  |  17 +-
 .../GlobalISel/regbankselect-sext-inreg.mir   |  24 +-
 .../AMDGPU/GlobalISel/regbankselect-shl.mir   |   6 +-
 .../CodeGen/AMDGPU/GlobalISel/sext_inreg.ll   |  34 +--
 llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll|  31 ++-
 13 files changed, 375 insertions(+), 165 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 4f9beeaacfaee..ba661348ca5b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -308,7 +308,7 @@ bool 
AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
 // Opcodes that support pretty much all combinations of reg banks and LLTs
 // (except S1). There is no point in writing rules for them.
 if (Opc == AMDGPU::G_BUILD_VECTOR || Opc == AMDGPU::G_UNMERGE_VALUES ||
-Opc == AMDGPU::G_MERGE_VALUES) {
+Opc == AMDGPU::G_MERGE_VALUES || Opc == AMDGPU::G_BITCAST) {
   RBLHelper.applyMappingTrivial(*MI);
   continue;
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index a7c1d7ab98adf..7ff822c6f6580 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -171,6 +171,62 @@ void RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr 
&MI) {
   MI.eraseFromParent();
 }
 
+const std::pair
+RegBankLegalizeHelper::unpackZExt(Register Reg) {
+  auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
+  auto Mask = B.buildConstant(SgprRB_S32, 0x);
+  auto Lo = B.buildAnd(SgprRB_S32, PackedS32, Mask);
+  auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 
16));
+  return {Lo.getReg(0), Hi.getReg(0)};
+}
+
+const std::pair
+RegBankLegalizeHelper::unpackSExt(Register Reg) {
+  auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
+  auto Lo = B.buildSExtInReg(SgprRB_S32, PackedS32, 16);
+  auto Hi = B.buildAShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 
16));
+  return {Lo.getReg(0), Hi.getReg(0)};
+}
+
+const std::pair
+RegBankLegalizeHelper::unpackAExt(Register Reg) {
+  auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
+  auto Lo = PackedS32;
+  auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 
16));
+  return {Lo.getReg(0), Hi.getReg(0)};
+}
+
+void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
+  Register Lo, Hi;
+  switch (MI.getOpcode()) {
+  case AMDGPU::G_SHL: {
+auto [Val0, Val1] = unpackAExt(MI.getOperand(1).getReg());
+auto [Amt0, Amt1] = unpackAExt(MI.getOperand(2).getReg());
+Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
+Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
+break;
+  }
+  case AMDGPU::G_LSHR: {
+auto [Val0, Val1] = unpackZExt(MI.getOperand(1).getReg());
+auto [Amt0, Amt1] = unpackZExt(MI.getOperand(2).getReg());
+Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
+Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
+break;
+  }
+  case AMDGPU::G_ASHR: {
+auto [Val0, Val1] = unpackSExt(MI.getOperand(1).getReg());
+auto [Amt0, Amt1] = unpackSExt(MI.getOperand(2).getReg());
+Lo = B.buildAShr(SgprRB_S32, Val0, Amt0).getReg(0);
+Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
+break;
+  }
+  default:
+llvm_unreachable("Unpack lowering not implemented");
+  }
+  B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
+  MI.eraseFromParent();
+}
+
 static bool isSignedBFE(MachineInstr &MI) {
   if (GIntrinsic *GI = dyn_cast(&MI))
 return (GI->is(Intrinsic::amdgcn_sbfe));
@@ -306,6 +362,33 @@ void 
RegBankLegalizeHelper::lowerSplitTo32Select(MachineIn

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for AND OR and XOR (PR #132382)

2025-05-26 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/132382

>From 4b82d7501c8e754ac36b91924fec89a70046ff63 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 8 May 2025 12:02:27 +0200
Subject: [PATCH] AMDGPU/GlobalISel: add RegBankLegalize rules for AND OR and
 XOR

Uniform S1 is lowered to S32.
Divergent S1 is selected as VCC(S1) instruction select will select
SALU instruction based on wavesize (S32 or S64).
S16 are selected as is. There are register classes for vgpr S16.
Since some isel patterns check for sgpr S16 we don't lower to S32.
For 32 and 64 bit types we use B32/B64 rules that cover scalar vector
and pointers types.
SALU B32 and B64 and VALU B32 instructions are available.
Divergent B64 is lowered to B32.
---
 .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 34 +---
 .../AMDGPU/AMDGPURegBankLegalizeHelper.h  |  1 +
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 10 ++-
 .../AMDGPU/AMDGPURegBankLegalizeRules.h   |  2 +
 .../AMDGPU/GlobalISel/regbankselect-and.mir   | 33 ---
 .../AMDGPU/GlobalISel/regbankselect-or.mir| 85 +--
 .../AMDGPU/GlobalISel/regbankselect-xor.mir   | 84 +-
 7 files changed, 136 insertions(+), 113 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index f03365c4da8dc..050b6302a98c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -14,6 +14,7 @@
 #include "AMDGPURegBankLegalizeHelper.h"
 #include "AMDGPUGlobalISelUtils.h"
 #include "AMDGPUInstrInfo.h"
+#include "AMDGPURegBankLegalizeRules.h"
 #include "AMDGPURegisterBankInfo.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -231,6 +232,23 @@ void RegBankLegalizeHelper::lowerS_BFE(MachineInstr &MI) {
   MI.eraseFromParent();
 }
 
+void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) {
+  Register Dst = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(Dst);
+  assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64);
+  LLT Ty = DstTy == V4S16 ? V2S16 : S32;
+  auto Op1 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(1).getReg());
+  auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
+  unsigned Opc = MI.getOpcode();
+  auto Flags = MI.getFlags();
+  auto Lo =
+  B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(0), Op2.getReg(0)}, Flags);
+  auto Hi =
+  B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(1), Op2.getReg(1)}, Flags);
+  B.buildMergeLikeInstr(Dst, {Lo, Hi});
+  MI.eraseFromParent();
+}
+
 void RegBankLegalizeHelper::lower(MachineInstr &MI,
   const RegBankLLTMapping &Mapping,
   SmallSet &WaterfallSgprs) {
@@ -319,20 +337,12 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
 MI.eraseFromParent();
 return;
   }
-  case SplitTo32: {
-auto Op1 = B.buildUnmerge(VgprRB_S32, MI.getOperand(1).getReg());
-auto Op2 = B.buildUnmerge(VgprRB_S32, MI.getOperand(2).getReg());
-unsigned Opc = MI.getOpcode();
-auto Lo = B.buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(0), Op2.getReg(0)});
-auto Hi = B.buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(1), Op2.getReg(1)});
-B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi});
-MI.eraseFromParent();
-break;
-  }
   case V_BFE:
 return lowerV_BFE(MI);
   case S_BFE:
 return lowerS_BFE(MI);
+  case SplitTo32:
+return lowerSplitTo32(MI);
   case SplitLoad: {
 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
 unsigned Size = DstTy.getSizeInBits();
@@ -392,6 +402,7 @@ LLT 
RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
   case UniInVcc:
 return LLT::scalar(1);
   case Sgpr16:
+  case Vgpr16:
 return LLT::scalar(16);
   case Sgpr32:
   case Sgpr32Trunc:
@@ -511,6 +522,7 @@ 
RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
   case Sgpr32AExtBoolInReg:
   case Sgpr32SExt:
 return SgprRB;
+  case Vgpr16:
   case Vgpr32:
   case Vgpr64:
   case VgprP0:
@@ -554,6 +566,7 @@ void RegBankLegalizeHelper::applyMappingDst(
 case SgprP4:
 case SgprP5:
 case SgprV4S32:
+case Vgpr16:
 case Vgpr32:
 case Vgpr64:
 case VgprP0:
@@ -685,6 +698,7 @@ void RegBankLegalizeHelper::applyMappingSrc(
   break;
 }
 // vgpr scalars, pointers and vectors
+case Vgpr16:
 case Vgpr32:
 case Vgpr64:
 case VgprP0:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
index 2d4da4cc90ea7..bbfa7b3986fd2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
@@ -112,6 +112,7 @@ class RegBankLegalizeHelper {
 
   void lowerV_BFE(MachineInstr &MI);
   void lowerS_BFE(MachineInstr &MI);
+  void lowerSplitTo32(MachineInstr &MI);
 };
 
 } // end n

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for bit shifts and sext-inreg (PR #132385)

2025-05-26 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/132385

>From 758eec524d2500317583750a75c1885ecbf394af Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Mon, 14 Apr 2025 16:35:19 +0200
Subject: [PATCH] AMDGPU/GlobalISel: add RegBankLegalize rules for bit shifts
 and sext-inreg

Uniform S16 shifts have to be extended to S32 using appropriate Extend
before lowering to S32 instruction.
Uniform packed V2S16 are lowered to SGPR S32 instructions,
other option is to use VALU packed V2S16 and ReadAnyLane.
For uniform S32 and S64 and divergent S16, S32, S64 and V2S16 there are
instructions available.
---
 .../Target/AMDGPU/AMDGPURegBankLegalize.cpp   |   2 +-
 .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 108 +
 .../AMDGPU/AMDGPURegBankLegalizeHelper.h  |   5 +
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp |  43 +++-
 .../AMDGPU/AMDGPURegBankLegalizeRules.h   |  11 +
 llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll   |  35 ++-
 llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll   | 218 ++
 .../AMDGPU/GlobalISel/regbankselect-ashr.mir  |   6 +-
 .../AMDGPU/GlobalISel/regbankselect-lshr.mir  |  17 +-
 .../GlobalISel/regbankselect-sext-inreg.mir   |  24 +-
 .../AMDGPU/GlobalISel/regbankselect-shl.mir   |   6 +-
 .../CodeGen/AMDGPU/GlobalISel/sext_inreg.ll   |  34 +--
 llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll|  31 ++-
 13 files changed, 375 insertions(+), 165 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 4f9beeaacfaee..ba661348ca5b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -308,7 +308,7 @@ bool 
AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
 // Opcodes that support pretty much all combinations of reg banks and LLTs
 // (except S1). There is no point in writing rules for them.
 if (Opc == AMDGPU::G_BUILD_VECTOR || Opc == AMDGPU::G_UNMERGE_VALUES ||
-Opc == AMDGPU::G_MERGE_VALUES) {
+Opc == AMDGPU::G_MERGE_VALUES || Opc == AMDGPU::G_BITCAST) {
   RBLHelper.applyMappingTrivial(*MI);
   continue;
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index a7c1d7ab98adf..7ff822c6f6580 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -171,6 +171,62 @@ void RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr 
&MI) {
   MI.eraseFromParent();
 }
 
+const std::pair
+RegBankLegalizeHelper::unpackZExt(Register Reg) {
+  auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
+  auto Mask = B.buildConstant(SgprRB_S32, 0x);
+  auto Lo = B.buildAnd(SgprRB_S32, PackedS32, Mask);
+  auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 
16));
+  return {Lo.getReg(0), Hi.getReg(0)};
+}
+
+const std::pair
+RegBankLegalizeHelper::unpackSExt(Register Reg) {
+  auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
+  auto Lo = B.buildSExtInReg(SgprRB_S32, PackedS32, 16);
+  auto Hi = B.buildAShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 
16));
+  return {Lo.getReg(0), Hi.getReg(0)};
+}
+
+const std::pair
+RegBankLegalizeHelper::unpackAExt(Register Reg) {
+  auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
+  auto Lo = PackedS32;
+  auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 
16));
+  return {Lo.getReg(0), Hi.getReg(0)};
+}
+
+void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
+  Register Lo, Hi;
+  switch (MI.getOpcode()) {
+  case AMDGPU::G_SHL: {
+auto [Val0, Val1] = unpackAExt(MI.getOperand(1).getReg());
+auto [Amt0, Amt1] = unpackAExt(MI.getOperand(2).getReg());
+Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
+Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
+break;
+  }
+  case AMDGPU::G_LSHR: {
+auto [Val0, Val1] = unpackZExt(MI.getOperand(1).getReg());
+auto [Amt0, Amt1] = unpackZExt(MI.getOperand(2).getReg());
+Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
+Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
+break;
+  }
+  case AMDGPU::G_ASHR: {
+auto [Val0, Val1] = unpackSExt(MI.getOperand(1).getReg());
+auto [Amt0, Amt1] = unpackSExt(MI.getOperand(2).getReg());
+Lo = B.buildAShr(SgprRB_S32, Val0, Amt0).getReg(0);
+Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
+break;
+  }
+  default:
+llvm_unreachable("Unpack lowering not implemented");
+  }
+  B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
+  MI.eraseFromParent();
+}
+
 static bool isSignedBFE(MachineInstr &MI) {
   if (GIntrinsic *GI = dyn_cast(&MI))
 return (GI->is(Intrinsic::amdgcn_sbfe));
@@ -306,6 +362,33 @@ void 
RegBankLegalizeHelper::lowerSplitTo32Select(MachineIn

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)

2025-05-26 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/132383

>From 50839949c8622e52027d378405d2edc99d9df3a7 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 8 May 2025 12:03:28 +0200
Subject: [PATCH] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and
 trunc

Uniform S1:
Truncs to uniform S1 and AnyExts from S1 are left as is as they are meant
to be combined away. Uniform S1 ZExt and SExt are lowered using select.
Divergent S1:
Trunc of VGPR to VCC is lowered as compare.
Extends of VCC are lowered using select.

For remaining types:
S32 to S64 ZExt and SExt are lowered using merge values, AnyExt and Trunc
are again left as is to be combined away.
Notably uniform S16 for SExt and Zext is not lowered to S32 and left as is
for instruction select to deal with them. This is because there are patterns
that check for S16 type.
---
 .../Target/AMDGPU/AMDGPURegBankLegalize.cpp   |   7 ++
 .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 110 +-
 .../AMDGPU/AMDGPURegBankLegalizeHelper.h  |   1 +
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp |  47 +++-
 .../AMDGPU/AMDGPURegBankLegalizeRules.h   |   3 +
 .../GlobalISel/regbankselect-and-s1.mir   | 105 +
 .../GlobalISel/regbankselect-anyext.mir   |  59 +-
 .../AMDGPU/GlobalISel/regbankselect-sext.mir  | 100 ++--
 .../AMDGPU/GlobalISel/regbankselect-trunc.mir |  22 +++-
 .../AMDGPU/GlobalISel/regbankselect-zext.mir  |  89 +-
 10 files changed, 360 insertions(+), 183 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 4d8d3022b8080..4f9beeaacfaee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -213,6 +213,13 @@ class AMDGPURegBankLegalizeCombiner {
   return;
 }
 
+if (DstTy == S64 && TruncSrcTy == S32) {
+  B.buildMergeLikeInstr(MI.getOperand(0).getReg(),
+{TruncSrc, B.buildUndef({SgprRB, S32})});
+  cleanUpAfterCombine(MI, Trunc);
+  return;
+}
+
 if (DstTy == S32 && TruncSrcTy == S16) {
   B.buildAnyExt(Dst, TruncSrc);
   cleanUpAfterCombine(MI, Trunc);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 050b6302a98c5..658982ceb68fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -133,6 +133,43 @@ void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, 
LLT WideTy,
   MI.eraseFromParent();
 }
 
+void RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr &MI) {
+  Register Dst = MI.getOperand(0).getReg();
+  LLT Ty = MRI.getType(Dst);
+  Register Src = MI.getOperand(1).getReg();
+  unsigned Opc = MI.getOpcode();
+  int TrueExtCst = Opc == G_SEXT ? -1 : 1;
+  if (Ty == S32 || Ty == S16) {
+auto True = B.buildConstant({VgprRB, Ty}, TrueExtCst);
+auto False = B.buildConstant({VgprRB, Ty}, 0);
+B.buildSelect(Dst, Src, True, False);
+  } else if (Ty == S64) {
+auto True = B.buildConstant({VgprRB_S32}, TrueExtCst);
+auto False = B.buildConstant({VgprRB_S32}, 0);
+auto Lo = B.buildSelect({VgprRB_S32}, Src, True, False);
+MachineInstrBuilder Hi;
+switch (Opc) {
+case G_SEXT:
+  Hi = Lo;
+  break;
+case G_ZEXT:
+  Hi = False;
+  break;
+case G_ANYEXT:
+  Hi = B.buildUndef({VgprRB_S32});
+  break;
+default:
+  llvm_unreachable("Opcode not supported");
+}
+
+B.buildMergeValues(Dst, {Lo.getReg(0), Hi.getReg(0)});
+  } else {
+llvm_unreachable("Type not supported");
+  }
+
+  MI.eraseFromParent();
+}
+
 static bool isSignedBFE(MachineInstr &MI) {
   if (GIntrinsic *GI = dyn_cast(&MI))
 return (GI->is(Intrinsic::amdgcn_sbfe));
@@ -256,26 +293,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
   switch (Mapping.LoweringMethod) {
   case DoNotLower:
 return;
-  case VccExtToSel: {
-LLT Ty = MRI.getType(MI.getOperand(0).getReg());
-Register Src = MI.getOperand(1).getReg();
-unsigned Opc = MI.getOpcode();
-if (Ty == S32 || Ty == S16) {
-  auto True = B.buildConstant({VgprRB, Ty}, Opc == G_SEXT ? -1 : 1);
-  auto False = B.buildConstant({VgprRB, Ty}, 0);
-  B.buildSelect(MI.getOperand(0).getReg(), Src, True, False);
-}
-if (Ty == S64) {
-  auto True = B.buildConstant({VgprRB, S32}, Opc == G_SEXT ? -1 : 1);
-  auto False = B.buildConstant({VgprRB, S32}, 0);
-  auto Sel = B.buildSelect({VgprRB, S32}, Src, True, False);
-  B.buildMergeValues(
-  MI.getOperand(0).getReg(),
-  {Sel.getReg(0), Opc == G_SEXT ? Sel.getReg(0) : False.getReg(0)});
-}
-MI.eraseFromParent();
-return;
-  }
+  case VccExtToSel:
+return lowerVccExtToSel(MI);
   case UniExtToSel: {
 LLT Ty = MRI.getType(MI.getOperand(0).g

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for bit shifts and sext-inreg (PR #132385)

2025-05-26 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/132385

>From 419cfe8adfe34443a380861fe2cfd36400c5fe1f Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Mon, 14 Apr 2025 16:35:19 +0200
Subject: [PATCH] AMDGPU/GlobalISel: add RegBankLegalize rules for bit shifts
 and sext-inreg

Uniform S16 shifts have to be extended to S32 using appropriate Extend
before lowering to S32 instruction.
Uniform packed V2S16 are lowered to SGPR S32 instructions,
other option is to use VALU packed V2S16 and ReadAnyLane.
For uniform S32 and S64 and divergent S16, S32, S64 and V2S16 there are
instructions available.
---
 .../Target/AMDGPU/AMDGPURegBankLegalize.cpp   |   2 +-
 .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 108 +
 .../AMDGPU/AMDGPURegBankLegalizeHelper.h  |   5 +
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp |  43 +++-
 .../AMDGPU/AMDGPURegBankLegalizeRules.h   |  11 +
 llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll   |  35 ++-
 llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll   | 218 ++
 .../AMDGPU/GlobalISel/regbankselect-ashr.mir  |   6 +-
 .../AMDGPU/GlobalISel/regbankselect-lshr.mir  |  17 +-
 .../GlobalISel/regbankselect-sext-inreg.mir   |  24 +-
 .../AMDGPU/GlobalISel/regbankselect-shl.mir   |   6 +-
 .../CodeGen/AMDGPU/GlobalISel/sext_inreg.ll   |  34 +--
 llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll|  31 ++-
 13 files changed, 375 insertions(+), 165 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 4f9beeaacfaee..ba661348ca5b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -308,7 +308,7 @@ bool 
AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
 // Opcodes that support pretty much all combinations of reg banks and LLTs
 // (except S1). There is no point in writing rules for them.
 if (Opc == AMDGPU::G_BUILD_VECTOR || Opc == AMDGPU::G_UNMERGE_VALUES ||
-Opc == AMDGPU::G_MERGE_VALUES) {
+Opc == AMDGPU::G_MERGE_VALUES || Opc == AMDGPU::G_BITCAST) {
   RBLHelper.applyMappingTrivial(*MI);
   continue;
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index a7c1d7ab98adf..7ff822c6f6580 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -171,6 +171,62 @@ void RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr 
&MI) {
   MI.eraseFromParent();
 }
 
+const std::pair
+RegBankLegalizeHelper::unpackZExt(Register Reg) {
+  auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
+  auto Mask = B.buildConstant(SgprRB_S32, 0x);
+  auto Lo = B.buildAnd(SgprRB_S32, PackedS32, Mask);
+  auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 
16));
+  return {Lo.getReg(0), Hi.getReg(0)};
+}
+
+const std::pair
+RegBankLegalizeHelper::unpackSExt(Register Reg) {
+  auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
+  auto Lo = B.buildSExtInReg(SgprRB_S32, PackedS32, 16);
+  auto Hi = B.buildAShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 
16));
+  return {Lo.getReg(0), Hi.getReg(0)};
+}
+
+const std::pair
+RegBankLegalizeHelper::unpackAExt(Register Reg) {
+  auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
+  auto Lo = PackedS32;
+  auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 
16));
+  return {Lo.getReg(0), Hi.getReg(0)};
+}
+
+void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
+  Register Lo, Hi;
+  switch (MI.getOpcode()) {
+  case AMDGPU::G_SHL: {
+auto [Val0, Val1] = unpackAExt(MI.getOperand(1).getReg());
+auto [Amt0, Amt1] = unpackAExt(MI.getOperand(2).getReg());
+Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
+Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
+break;
+  }
+  case AMDGPU::G_LSHR: {
+auto [Val0, Val1] = unpackZExt(MI.getOperand(1).getReg());
+auto [Amt0, Amt1] = unpackZExt(MI.getOperand(2).getReg());
+Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
+Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
+break;
+  }
+  case AMDGPU::G_ASHR: {
+auto [Val0, Val1] = unpackSExt(MI.getOperand(1).getReg());
+auto [Amt0, Amt1] = unpackSExt(MI.getOperand(2).getReg());
+Lo = B.buildAShr(SgprRB_S32, Val0, Amt0).getReg(0);
+Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
+break;
+  }
+  default:
+llvm_unreachable("Unpack lowering not implemented");
+  }
+  B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
+  MI.eraseFromParent();
+}
+
 static bool isSignedBFE(MachineInstr &MI) {
   if (GIntrinsic *GI = dyn_cast(&MI))
 return (GI->is(Intrinsic::amdgcn_sbfe));
@@ -306,6 +362,33 @@ void 
RegBankLegalizeHelper::lowerSplitTo32Select(MachineIn

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for bit shifts and sext-inreg (PR #132385)

2025-05-26 Thread Petar Avramovic via llvm-branch-commits

petar-avramovic wrote:

### Merge activity

* **May 26, 10:03 AM UTC**: A user started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/132385).


https://github.com/llvm/llvm-project/pull/132385
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)

2025-05-26 Thread Petar Avramovic via llvm-branch-commits

petar-avramovic wrote:

### Merge activity

* **May 26, 10:03 AM UTC**: A user started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/132383).


https://github.com/llvm/llvm-project/pull/132383
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for select (PR #132384)

2025-05-26 Thread Petar Avramovic via llvm-branch-commits

petar-avramovic wrote:

### Merge activity

* **May 26, 10:03 AM UTC**: A user started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/132384).


https://github.com/llvm/llvm-project/pull/132384
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] Change inSectionBlame to return pair (FileIdx, LineNo). (PR #141540)

2025-05-26 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-support

Author: Qinkun Bao (qinkunbao)


Changes

Accoring to the discussion in
https://github.com/llvm/llvm-project/pull/140529, we need to SSCL can be
created from multiple ignore list files, so we can repeat
-fsanitize-ignorelist=. The change is necessary to achieve the feature
described in https://github.com/llvm/llvm-project/issues/139128.


---
Full diff: https://github.com/llvm/llvm-project/pull/141540.diff


4 Files Affected:

- (modified) llvm/include/llvm/Support/SpecialCaseList.h (+9-6) 
- (modified) llvm/lib/Support/SpecialCaseList.cpp (+10-6) 
- (modified) llvm/tools/llvm-cfi-verify/llvm-cfi-verify.cpp (+9-6) 
- (modified) llvm/unittests/Support/SpecialCaseListTest.cpp (+45-17) 


``diff
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h 
b/llvm/include/llvm/Support/SpecialCaseList.h
index 653a3b14ebf03..bce337f553a93 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -17,6 +17,7 @@
 #include "llvm/Support/GlobPattern.h"
 #include "llvm/Support/Regex.h"
 #include 
+#include 
 #include 
 #include 
 
@@ -93,17 +94,17 @@ class SpecialCaseList {
   LLVM_ABI bool inSection(StringRef Section, StringRef Prefix, StringRef Query,
   StringRef Category = StringRef()) const;
 
-  /// Returns the line number corresponding to the special case list entry if
-  /// the special case list contains a line
+  /// Returns the file index and the line numebr  
corresponding
+  /// to the special case list entry if the special case list contains a line
   /// \code
   ///   @Prefix:=@Category
   /// \endcode
   /// where @Query satisfies the glob  in a given @Section.
-  /// Returns zero if there is no exclusion entry corresponding to this
+  /// Returns (zero, zero) if there is no exclusion entry corresponding to this
   /// expression.
-  LLVM_ABI unsigned inSectionBlame(StringRef Section, StringRef Prefix,
-   StringRef Query,
-   StringRef Category = StringRef()) const;
+  LLVM_ABI std::pair
+  inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query,
+ StringRef Category = StringRef()) const;
 
 protected:
   // Implementations of the create*() functions that can also be used by 
derived
@@ -145,12 +146,14 @@ class SpecialCaseList {
 Section(std::unique_ptr M) : SectionMatcher(std::move(M)) {};
 Section() : Section(std::make_unique()) {};
 
+unsigned FileIdx;
 std::unique_ptr SectionMatcher;
 SectionEntries Entries;
 std::string SectionStr;
   };
 
   std::vector Sections;
+  unsigned currFileIdx;
 
   LLVM_ABI Expected addSection(StringRef SectionStr, unsigned 
LineNo,
   bool UseGlobs = true);
diff --git a/llvm/lib/Support/SpecialCaseList.cpp 
b/llvm/lib/Support/SpecialCaseList.cpp
index 56327b56282c2..5a66f163d7913 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -112,6 +112,7 @@ bool SpecialCaseList::createInternal(const 
std::vector &Paths,
   return false;
 }
 std::string ParseError;
+++currFileIdx;
 if (!parse(FileOrErr.get().get(), ParseError)) {
   Error = (Twine("error parsing file '") + Path + "': " + 
ParseError).str();
   return false;
@@ -122,6 +123,7 @@ bool SpecialCaseList::createInternal(const 
std::vector &Paths,
 
 bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
  std::string &Error) {
+  ++currFileIdx;
   if (!parse(MB, Error))
 return false;
   return true;
@@ -133,6 +135,7 @@ SpecialCaseList::addSection(StringRef SectionStr, unsigned 
LineNo,
   Sections.emplace_back();
   auto &Section = Sections.back();
   Section.SectionStr = SectionStr;
+  Section.FileIdx = currFileIdx;
 
   if (auto Err = Section.SectionMatcher->insert(SectionStr, LineNo, UseGlobs)) 
{
 return createStringError(errc::invalid_argument,
@@ -207,20 +210,21 @@ SpecialCaseList::~SpecialCaseList() = default;
 
 bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix,
 StringRef Query, StringRef Category) const {
-  return inSectionBlame(Section, Prefix, Query, Category);
+  auto [FileIdx, LineNo] = inSectionBlame(Section, Prefix, Query, Category);
+  return LineNo;
 }
 
-unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
- StringRef Query,
- StringRef Category) const {
+std::pair
+SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
+StringRef Query, StringRef Category) const {
   for (auto it = Sections.crbegin(); it != Sections.crend(); ++it) {
 if (it->SectionMatcher->match(Section)) {
   unsigned Blame = inSectionBlame(it->Entries, Prefix, Query, Category);
   if (Blame)
-return Bla

[llvm-branch-commits] Change inSectionBlame to return pair (FileIdx, LineNo). (PR #141540)

2025-05-26 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff HEAD~1 HEAD --extensions h,cpp -- 
llvm/include/llvm/Support/SpecialCaseList.h 
llvm/lib/Support/SpecialCaseList.cpp 
llvm/tools/llvm-cfi-verify/llvm-cfi-verify.cpp 
llvm/unittests/Support/SpecialCaseListTest.cpp
``





View the diff from clang-format here.


``diff
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h 
b/llvm/include/llvm/Support/SpecialCaseList.h
index bce337f55..d54b242a9 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -17,8 +17,8 @@
 #include "llvm/Support/GlobPattern.h"
 #include "llvm/Support/Regex.h"
 #include 
-#include 
 #include 
+#include 
 #include 
 
 namespace llvm {

``




https://github.com/llvm/llvm-project/pull/141540
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Add a GUIDLIST table to bitcode (PR #139497)

2025-05-26 Thread Owen Rodley via llvm-branch-commits

https://github.com/orodley updated 
https://github.com/llvm/llvm-project/pull/139497

>From 553845ef071219713cd6abe74310e33603c20ef1 Mon Sep 17 00:00:00 2001
From: Owen Rodley 
Date: Mon, 12 May 2025 15:50:22 +1000
Subject: [PATCH] Add a GUIDLIST table to bitcode

---
 llvm/include/llvm/Bitcode/LLVMBitCodes.h  |  3 +++
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 11 +++---
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 25 +++
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h 
b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 92b6e68d9d0a7..8acba6477c4a1 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -120,6 +120,9 @@ enum ModuleCodes {
 
   // IFUNC: [ifunc value type, addrspace, resolver val#, linkage, visibility]
   MODULE_CODE_IFUNC = 18,
+
+  // GUIDLIST: [n x i64]
+  MODULE_CODE_GUIDLIST = 19,
 };
 
 /// PARAMATTR blocks have code for defining a parameter attribute set.
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp 
b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 1d7aa189026a5..6d36b007956a0 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -980,6 +980,9 @@ class ModuleSummaryIndexBitcodeReader : public 
BitcodeReaderBase {
   /// the CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format.
   std::vector RadixArray;
 
+  // A table which maps ValueID to the GUID for that value.
+  std::vector DefinedGUIDs;
+
 public:
   ModuleSummaryIndexBitcodeReader(
   BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex,
@@ -7164,9 +7167,7 @@ 
ModuleSummaryIndexBitcodeReader::getValueInfoFromValueId(unsigned ValueId) {
 void ModuleSummaryIndexBitcodeReader::setValueGUID(
 uint64_t ValueID, StringRef ValueName, GlobalValue::LinkageTypes Linkage,
 StringRef SourceFileName) {
-  std::string GlobalId =
-  GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName);
-  auto ValueGUID = GlobalValue::getGUIDAssumingExternalLinkage(GlobalId);
+  auto ValueGUID = DefinedGUIDs[ValueID];
   auto OriginalNameID = ValueGUID;
   if (GlobalValue::isLocalLinkage(Linkage))
 OriginalNameID = GlobalValue::getGUIDAssumingExternalLinkage(ValueName);
@@ -7389,6 +7390,10 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
   // was historically always the start of the regular bitcode header.
   VSTOffset = Record[0] - 1;
   break;
+// MODULE_CODE_GUIDLIST: [i64 x N]
+case bitc::MODULE_CODE_GUIDLIST:
+  llvm::append_range(DefinedGUIDs, Record);
+  break;
 // v1 GLOBALVAR: [pointer type, isconst, initid,   linkage, 
...]
 // v1 FUNCTION:  [type, callingconv, isproto,  linkage, 
...]
 // v1 ALIAS: [alias type,   addrspace,   aliasee val#, linkage, 
...]
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp 
b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 73bed85c65b3d..3e19220d1bde7 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -227,6 +227,7 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
 
 protected:
   void writePerModuleGlobalValueSummary();
+  void writeGUIDList();
 
 private:
   void writePerModuleFunctionSummaryRecord(
@@ -1560,6 +1561,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
 Vals.clear();
   }
 
+  writeGUIDList();
+
   // Emit the global variable information.
   for (const GlobalVariable &GV : M.globals()) {
 unsigned AbbrevToUse = 0;
@@ -4755,6 +4758,26 @@ void 
ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
   Stream.ExitBlock();
 }
 
+void ModuleBitcodeWriterBase::writeGUIDList() {
+  std::vector GUIDs;
+  GUIDs.reserve(M.global_size() + M.size() + M.alias_size());
+
+  for (const GlobalValue &GV : M.global_objects()) {
+if (GV.isDeclaration()) {
+  GUIDs.push_back(
+  GlobalValue::getGUIDAssumingExternalLinkage(GV.getName()));
+} else {
+  GUIDs.push_back(GV.getGUID());
+}
+  }
+  for (const GlobalAlias &GA : M.aliases()) {
+// Equivalent to the above loop, as GlobalAliases are always definitions.
+GUIDs.push_back(GA.getGUID());
+  }
+
+  Stream.EmitRecord(bitc::MODULE_CODE_GUIDLIST, GUIDs);
+}
+
 /// Emit the combined summary section into the combined index file.
 void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
   Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 4);
@@ -5538,6 +5561,8 @@ void ThinLinkBitcodeWriter::writeSimplifiedModuleInfo() {
 Vals.clear();
   }
 
+  writeGUIDList();
+
   // Emit the global variable information.
   for (const GlobalVariable &GV : M.globals()) {
 // GLOBALVAR: [strtab offset, strtab size, 0, 0, 0, linkage]

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailm

[llvm-branch-commits] [llvm] Add a GUIDLIST table to bitcode (PR #139497)

2025-05-26 Thread Owen Rodley via llvm-branch-commits

https://github.com/orodley updated 
https://github.com/llvm/llvm-project/pull/139497

>From 553845ef071219713cd6abe74310e33603c20ef1 Mon Sep 17 00:00:00 2001
From: Owen Rodley 
Date: Mon, 12 May 2025 15:50:22 +1000
Subject: [PATCH] Add a GUIDLIST table to bitcode

---
 llvm/include/llvm/Bitcode/LLVMBitCodes.h  |  3 +++
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 11 +++---
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 25 +++
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h 
b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 92b6e68d9d0a7..8acba6477c4a1 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -120,6 +120,9 @@ enum ModuleCodes {
 
   // IFUNC: [ifunc value type, addrspace, resolver val#, linkage, visibility]
   MODULE_CODE_IFUNC = 18,
+
+  // GUIDLIST: [n x i64]
+  MODULE_CODE_GUIDLIST = 19,
 };
 
 /// PARAMATTR blocks have code for defining a parameter attribute set.
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp 
b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 1d7aa189026a5..6d36b007956a0 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -980,6 +980,9 @@ class ModuleSummaryIndexBitcodeReader : public 
BitcodeReaderBase {
   /// the CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format.
   std::vector RadixArray;
 
+  // A table which maps ValueID to the GUID for that value.
+  std::vector DefinedGUIDs;
+
 public:
   ModuleSummaryIndexBitcodeReader(
   BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex,
@@ -7164,9 +7167,7 @@ 
ModuleSummaryIndexBitcodeReader::getValueInfoFromValueId(unsigned ValueId) {
 void ModuleSummaryIndexBitcodeReader::setValueGUID(
 uint64_t ValueID, StringRef ValueName, GlobalValue::LinkageTypes Linkage,
 StringRef SourceFileName) {
-  std::string GlobalId =
-  GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName);
-  auto ValueGUID = GlobalValue::getGUIDAssumingExternalLinkage(GlobalId);
+  auto ValueGUID = DefinedGUIDs[ValueID];
   auto OriginalNameID = ValueGUID;
   if (GlobalValue::isLocalLinkage(Linkage))
 OriginalNameID = GlobalValue::getGUIDAssumingExternalLinkage(ValueName);
@@ -7389,6 +7390,10 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
   // was historically always the start of the regular bitcode header.
   VSTOffset = Record[0] - 1;
   break;
+// MODULE_CODE_GUIDLIST: [i64 x N]
+case bitc::MODULE_CODE_GUIDLIST:
+  llvm::append_range(DefinedGUIDs, Record);
+  break;
 // v1 GLOBALVAR: [pointer type, isconst, initid,   linkage, 
...]
 // v1 FUNCTION:  [type, callingconv, isproto,  linkage, 
...]
 // v1 ALIAS: [alias type,   addrspace,   aliasee val#, linkage, 
...]
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp 
b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 73bed85c65b3d..3e19220d1bde7 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -227,6 +227,7 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
 
 protected:
   void writePerModuleGlobalValueSummary();
+  void writeGUIDList();
 
 private:
   void writePerModuleFunctionSummaryRecord(
@@ -1560,6 +1561,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
 Vals.clear();
   }
 
+  writeGUIDList();
+
   // Emit the global variable information.
   for (const GlobalVariable &GV : M.globals()) {
 unsigned AbbrevToUse = 0;
@@ -4755,6 +4758,26 @@ void 
ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
   Stream.ExitBlock();
 }
 
+void ModuleBitcodeWriterBase::writeGUIDList() {
+  std::vector GUIDs;
+  GUIDs.reserve(M.global_size() + M.size() + M.alias_size());
+
+  for (const GlobalValue &GV : M.global_objects()) {
+if (GV.isDeclaration()) {
+  GUIDs.push_back(
+  GlobalValue::getGUIDAssumingExternalLinkage(GV.getName()));
+} else {
+  GUIDs.push_back(GV.getGUID());
+}
+  }
+  for (const GlobalAlias &GA : M.aliases()) {
+// Equivalent to the above loop, as GlobalAliases are always definitions.
+GUIDs.push_back(GA.getGUID());
+  }
+
+  Stream.EmitRecord(bitc::MODULE_CODE_GUIDLIST, GUIDs);
+}
+
 /// Emit the combined summary section into the combined index file.
 void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
   Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 4);
@@ -5538,6 +5561,8 @@ void ThinLinkBitcodeWriter::writeSimplifiedModuleInfo() {
 Vals.clear();
   }
 
+  writeGUIDList();
+
   // Emit the global variable information.
   for (const GlobalVariable &GV : M.globals()) {
 // GLOBALVAR: [strtab offset, strtab size, 0, 0, 0, linkage]

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailm

[llvm-branch-commits] Change inSectionBlame to return pair (FileIdx, LineNo). (PR #141540)

2025-05-26 Thread Qinkun Bao via llvm-branch-commits

https://github.com/qinkunbao created 
https://github.com/llvm/llvm-project/pull/141540

Accoring to the discussion in
https://github.com/llvm/llvm-project/pull/140529, we need to SSCL can be
created from multiple ignore list files, so we can repeat
-fsanitize-ignorelist=. The change is necessary to achieve the feature
described in https://github.com/llvm/llvm-project/issues/139128.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-doc] Track if a type is a template or builtin (PR #138067)

2025-05-26 Thread Petr Hosek via llvm-branch-commits

https://github.com/petrhosek approved this pull request.


https://github.com/llvm/llvm-project/pull/138067
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [Driver] Fix _XOPEN_SOURCE definition on Solaris (#137141) (PR #140044)

2025-05-26 Thread Brad Smith via llvm-branch-commits

https://github.com/brad0 approved this pull request.


https://github.com/llvm/llvm-project/pull/140044
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-doc] Update clang-doc tool to enable mustache templates (PR #138066)

2025-05-26 Thread Petr Hosek via llvm-branch-commits

https://github.com/petrhosek approved this pull request.


https://github.com/llvm/llvm-project/pull/138066
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Change inSectionBlame to return pair (FileIdx, LineNo). (PR #141540)

2025-05-26 Thread Qinkun Bao via llvm-branch-commits

https://github.com/qinkunbao updated 
https://github.com/llvm/llvm-project/pull/141540

>From d5508cc217f413b3bbb7a301b2110cfc0c2c6cbc Mon Sep 17 00:00:00 2001
From: Qinkun Bao 
Date: Tue, 27 May 2025 03:24:26 +
Subject: [PATCH] Format SpecialCaseList.h

Created using spr 1.3.6
---
 llvm/include/llvm/Support/SpecialCaseList.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Support/SpecialCaseList.h 
b/llvm/include/llvm/Support/SpecialCaseList.h
index bce337f553a93..d54b242a9c501 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -17,8 +17,8 @@
 #include "llvm/Support/GlobPattern.h"
 #include "llvm/Support/Regex.h"
 #include 
-#include 
 #include 
+#include 
 #include 
 
 namespace llvm {

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [HLSL] Diagnose overlapping resource bindings (PR #140982)

2025-05-26 Thread Finn Plummer via llvm-branch-commits


@@ -50,15 +51,55 @@ static void reportInvalidDirection(Module &M, 
DXILResourceMap &DRM) {
   }
 }
 
-} // namespace
+static void reportOverlappingError(Module &M, ResourceInfo R1,
+   ResourceInfo R2) {
+  SmallString<64> Message;
+  raw_svector_ostream OS(Message);
+  OS << "resource " << R1.getName() << " at register "
+ << R1.getBinding().LowerBound << " overlaps with resource " << 
R2.getName()
+ << " at register " << R2.getBinding().LowerBound << ", space "
+ << R2.getBinding().Space;
+  M.getContext().diagnose(DiagnosticInfoGeneric(Message));
+}
 
-PreservedAnalyses
-DXILPostOptimizationValidation::run(Module &M, ModuleAnalysisManager &MAM) {
-  DXILResourceMap &DRM = MAM.getResult(M);
+static void reportOverlappingBinding(Module &M, DXILResourceMap &DRM) {
+  if (DRM.empty())
+return;
 
+  for (auto ResList :
+   {DRM.srvs(), DRM.uavs(), DRM.cbuffers(), DRM.samplers()}) {
+if (ResList.empty())
+  continue;
+const ResourceInfo *PrevRI = &*ResList.begin();
+for (auto *I = ResList.begin() + 1; I != ResList.end(); ++I) {
+  const ResourceInfo *RI = &*I;
+  if (PrevRI->getBinding().overlapsWith(RI->getBinding())) {

inbelic wrote:

The `overlapsWith` check requires that the `LHS.LowerBound < RHS.LowerBound`. 
So I want to check that:

1. these are sorted
2. the `std::tie` works as expected in the `<` comparison. Since `RecordID` is 
compared before `LowerBound` could it be possible that a binding has a lower 
`RecordID` but higher `LowerBound`.

https://github.com/llvm/llvm-project/pull/140982
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement validation of resource ranges for `RootDescriptors` (PR #140962)

2025-05-26 Thread Deric C. via llvm-branch-commits


@@ -951,6 +952,108 @@ void SemaHLSL::emitLogicalOperatorFixIt(Expr *LHS, Expr 
*RHS,
   << NewFnName << FixItHint::CreateReplacement(FullRange, OS.str());
 }
 
+namespace {
+
+// A resource range overlaps with another resource range if they have:
+// - equivalent ResourceClass (SRV, UAV, CBuffer, Sampler)
+// - equivalent resource space
+// - overlapping visbility
+class ResourceRanges {
+public:
+  // KeyT: 32-lsb denotes resource space, and 32-msb denotes resource type enum
+  using KeyT = uint64_t;
+
+  static const unsigned NumVisEnums =
+  (unsigned)llvm::hlsl::rootsig::ShaderVisibility::NumEnums;
+
+private:
+  llvm::hlsl::rootsig::ResourceRange::IMap::Allocator Allocator;
+
+  // Denotes a mapping of a unique combination of ResourceClass and register
+  // space to a ResourceRange
+  using MapT = llvm::SmallDenseMap;
+
+  // Denotes a mapping for each unique visibility
+  MapT RangeMaps[NumVisEnums];
+
+  constexpr static KeyT getKey(const llvm::hlsl::rootsig::RangeInfo &Info) {
+uint64_t SpacePacked = (uint64_t)Info.Space;
+uint64_t ClassPacked = (uint64_t)llvm::to_underlying(Info.Class);
+return (ClassPacked << 32) | SpacePacked;
+  }
+
+public:
+  // Returns std::nullopt if there was no collision. Otherwise, it will
+  // return the RangeInfo of the collision
+  std::optional
+  addRange(const llvm::hlsl::rootsig::RangeInfo &Info) {
+MapT &VisRangeMap = RangeMaps[llvm::to_underlying(Info.Vis)];
+auto [It, _] = VisRangeMap.insert(
+{getKey(Info), llvm::hlsl::rootsig::ResourceRange(Allocator)});
+auto Res = It->second.insert(Info);
+if (Res.has_value())
+  return Res;
+
+MutableArrayRef Maps =

Icohedron wrote:

Perhaps a comment here would be helpful to explain more clearly what this and 
the following loop are doing.

https://github.com/llvm/llvm-project/pull/140962
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement validation of resource ranges for `RootDescriptors` (PR #140962)

2025-05-26 Thread Deric C. via llvm-branch-commits


@@ -951,6 +952,108 @@ void SemaHLSL::emitLogicalOperatorFixIt(Expr *LHS, Expr 
*RHS,
   << NewFnName << FixItHint::CreateReplacement(FullRange, OS.str());
 }
 
+namespace {
+
+// A resource range overlaps with another resource range if they have:
+// - equivalent ResourceClass (SRV, UAV, CBuffer, Sampler)
+// - equivalent resource space
+// - overlapping visbility
+class ResourceRanges {
+public:
+  // KeyT: 32-lsb denotes resource space, and 32-msb denotes resource type enum
+  using KeyT = uint64_t;

Icohedron wrote:

Is there a reason to store the resource space and ResourceClass together as a 
64-bit integer as opposed to, say, a `std::pair`?

https://github.com/llvm/llvm-project/pull/140962
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement validation of resource ranges for `RootDescriptors` (PR #140962)

2025-05-26 Thread Deric C. via llvm-branch-commits


@@ -951,6 +952,108 @@ void SemaHLSL::emitLogicalOperatorFixIt(Expr *LHS, Expr 
*RHS,
   << NewFnName << FixItHint::CreateReplacement(FullRange, OS.str());
 }
 
+namespace {
+
+// A resource range overlaps with another resource range if they have:
+// - equivalent ResourceClass (SRV, UAV, CBuffer, Sampler)
+// - equivalent resource space
+// - overlapping visbility
+class ResourceRanges {
+public:
+  // KeyT: 32-lsb denotes resource space, and 32-msb denotes resource type enum

Icohedron wrote:

```suggestion
  // KeyT: 32-lsb denotes resource space, and 32-msb denotes ResourceClass enum
```

https://github.com/llvm/llvm-project/pull/140962
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement validation of resource ranges for `RootDescriptors` (PR #140962)

2025-05-26 Thread Deric C. via llvm-branch-commits


@@ -951,6 +952,108 @@ void SemaHLSL::emitLogicalOperatorFixIt(Expr *LHS, Expr 
*RHS,
   << NewFnName << FixItHint::CreateReplacement(FullRange, OS.str());
 }
 
+namespace {
+
+// A resource range overlaps with another resource range if they have:
+// - equivalent ResourceClass (SRV, UAV, CBuffer, Sampler)
+// - equivalent resource space
+// - overlapping visbility
+class ResourceRanges {
+public:
+  // KeyT: 32-lsb denotes resource space, and 32-msb denotes resource type enum
+  using KeyT = uint64_t;
+
+  static const unsigned NumVisEnums =

Icohedron wrote:

When to use `unsigned` as opposed to `uint32_t`? I noticed there has been 
inconsistency over which type is used.

https://github.com/llvm/llvm-project/pull/140962
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [RISCV][Driver] Add riscv emulation mode to linker job of BareMetal toolchain (PR #134442)

2025-05-26 Thread Garvit Gupta via llvm-branch-commits

https://github.com/quic-garvgupt updated 
https://github.com/llvm/llvm-project/pull/134442

>From 1459a1e87a2c31b51ad3473b2735358ae39c0764 Mon Sep 17 00:00:00 2001
From: Garvit Gupta 
Date: Fri, 4 Apr 2025 12:51:19 -0700
Subject: [PATCH] [RISCV][Driver] Add riscv emulation mode to linker job of
 BareMetal toolchain

Change-Id: Ifce8a3a7f1df9c12561d35ca3c923595e3619428
---
 clang/lib/Driver/ToolChains/BareMetal.cpp  | 17 --
 clang/lib/Driver/ToolChains/CommonArgs.cpp | 70 ++
 clang/lib/Driver/ToolChains/CommonArgs.h   |  2 +
 clang/lib/Driver/ToolChains/Gnu.cpp| 70 --
 clang/test/Driver/baremetal.cpp| 44 +++---
 5 files changed, 106 insertions(+), 97 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp 
b/clang/lib/Driver/ToolChains/BareMetal.cpp
index 4a4b94adefb44..feb1ebd34854a 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -551,8 +551,18 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 
   CmdArgs.push_back("-Bstatic");
 
-  if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax))
-CmdArgs.push_back("--no-relax");
+  if (Triple.isRISCV()) {
+CmdArgs.push_back("-X");
+if (Args.hasArg(options::OPT_mno_relax))
+  CmdArgs.push_back("--no-relax");
+if (const char *LDMOption = getLDMOption(TC.getTriple(), Args)) {
+  CmdArgs.push_back("-m");
+  CmdArgs.push_back(LDMOption);
+} else {
+  D.Diag(diag::err_target_unknown_triple) << Triple.str();
+  return;
+}
+  }
 
   if (Triple.isARM() || Triple.isThumb()) {
 bool IsBigEndian = arm::isARMBigEndian(Triple, Args);
@@ -639,9 +649,6 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
   NeedCRTs)
 CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTEnd)));
 
-  if (TC.getTriple().isRISCV())
-CmdArgs.push_back("-X");
-
   // The R_ARM_TARGET2 relocation must be treated as R_ARM_REL32 on arm*-*-elf
   // and arm*-*-eabi (the default is R_ARM_GOT_PREL, used on arm*-*-linux and
   // arm*-*-*bsd).
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp 
b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index ddeadff8f6dfb..292d52acdc002 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -535,6 +535,76 @@ void tools::AddLinkerInputs(const ToolChain &TC, const 
InputInfoList &Inputs,
   }
 }
 
+const char *tools::getLDMOption(const llvm::Triple &T, const ArgList &Args) {
+  switch (T.getArch()) {
+  case llvm::Triple::x86:
+if (T.isOSIAMCU())
+  return "elf_iamcu";
+return "elf_i386";
+  case llvm::Triple::aarch64:
+return "aarch64linux";
+  case llvm::Triple::aarch64_be:
+return "aarch64linuxb";
+  case llvm::Triple::arm:
+  case llvm::Triple::thumb:
+  case llvm::Triple::armeb:
+  case llvm::Triple::thumbeb:
+return tools::arm::isARMBigEndian(T, Args) ? "armelfb_linux_eabi"
+   : "armelf_linux_eabi";
+  case llvm::Triple::m68k:
+return "m68kelf";
+  case llvm::Triple::ppc:
+if (T.isOSLinux())
+  return "elf32ppclinux";
+return "elf32ppc";
+  case llvm::Triple::ppcle:
+if (T.isOSLinux())
+  return "elf32lppclinux";
+return "elf32lppc";
+  case llvm::Triple::ppc64:
+return "elf64ppc";
+  case llvm::Triple::ppc64le:
+return "elf64lppc";
+  case llvm::Triple::riscv32:
+return "elf32lriscv";
+  case llvm::Triple::riscv64:
+return "elf64lriscv";
+  case llvm::Triple::sparc:
+  case llvm::Triple::sparcel:
+return "elf32_sparc";
+  case llvm::Triple::sparcv9:
+return "elf64_sparc";
+  case llvm::Triple::loongarch32:
+return "elf32loongarch";
+  case llvm::Triple::loongarch64:
+return "elf64loongarch";
+  case llvm::Triple::mips:
+return "elf32btsmip";
+  case llvm::Triple::mipsel:
+return "elf32ltsmip";
+  case llvm::Triple::mips64:
+if (tools::mips::hasMipsAbiArg(Args, "n32") || T.isABIN32())
+  return "elf32btsmipn32";
+return "elf64btsmip";
+  case llvm::Triple::mips64el:
+if (tools::mips::hasMipsAbiArg(Args, "n32") || T.isABIN32())
+  return "elf32ltsmipn32";
+return "elf64ltsmip";
+  case llvm::Triple::systemz:
+return "elf64_s390";
+  case llvm::Triple::x86_64:
+if (T.isX32())
+  return "elf32_x86_64";
+return "elf_x86_64";
+  case llvm::Triple::ve:
+return "elf64ve";
+  case llvm::Triple::csky:
+return "cskyelf_linux";
+  default:
+return nullptr;
+  }
+}
+
 void tools::addLinkerCompressDebugSectionsOption(
 const ToolChain &TC, const llvm::opt::ArgList &Args,
 llvm::opt::ArgStringList &CmdArgs) {
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h 
b/clang/lib/Driver/ToolChains/CommonArgs.h
index 96bc0619dcbc0..875354e969a2a 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.h
+++ b/clang/lib/Driver/ToolChains/CommonArgs.h
@@ -31,6 +31,8 @@ void Ad

[llvm-branch-commits] [clang] [RISCV] Integrate RISCV target in baremetal toolchain object and deprecate RISCVToolchain object (PR #121831)

2025-05-26 Thread Garvit Gupta via llvm-branch-commits

https://github.com/quic-garvgupt updated 
https://github.com/llvm/llvm-project/pull/121831

>From ac5f3b202b8c1c079e5365849b0e268e01fd4479 Mon Sep 17 00:00:00 2001
From: Garvit Gupta 
Date: Mon, 6 Jan 2025 10:05:08 -0800
Subject: [PATCH] [RISCV] Integrate RISCV target in baremetal toolchain object
 and deprecate RISCVToolchain object

This patch:
- Adds CXXStdlib, runtimelib and unwindlib defaults for riscv target to
  BareMetal toolchain object.
- Add riscv 32 and 64-bit emulation flags to linker job of BareMetal
  toolchain.
- Removes call to RISCVToolChain object from llvm.

This PR is last patch in the series of patches of merging RISCVToolchain
object into BareMetal toolchain object.

RFC:
https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524

Change-Id: I2877ac328f55a7638cc185d6034866cbd2ac4203
---
 clang/lib/Driver/CMakeLists.txt   |   1 -
 clang/lib/Driver/Driver.cpp   |  10 +-
 clang/lib/Driver/ToolChains/BareMetal.cpp |  20 ++
 clang/lib/Driver/ToolChains/BareMetal.h   |  11 +-
 .../lib/Driver/ToolChains/RISCVToolchain.cpp  | 232 --
 clang/lib/Driver/ToolChains/RISCVToolchain.h  |  67 -
 .../test/Driver/baremetal-undefined-symbols.c |  14 +-
 clang/test/Driver/riscv32-toolchain-extra.c   |   7 +-
 clang/test/Driver/riscv32-toolchain.c |  26 +-
 clang/test/Driver/riscv64-toolchain-extra.c   |   7 +-
 clang/test/Driver/riscv64-toolchain.c |  20 +-
 11 files changed, 61 insertions(+), 354 deletions(-)
 delete mode 100644 clang/lib/Driver/ToolChains/RISCVToolchain.cpp
 delete mode 100644 clang/lib/Driver/ToolChains/RISCVToolchain.h

diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt
index 5bdb6614389cf..eee29af5d181a 100644
--- a/clang/lib/Driver/CMakeLists.txt
+++ b/clang/lib/Driver/CMakeLists.txt
@@ -74,7 +74,6 @@ add_clang_library(clangDriver
   ToolChains/OHOS.cpp
   ToolChains/OpenBSD.cpp
   ToolChains/PS4CPU.cpp
-  ToolChains/RISCVToolchain.cpp
   ToolChains/Solaris.cpp
   ToolChains/SPIRV.cpp
   ToolChains/SPIRVOpenMP.cpp
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 07e36ea2efba4..cfc0ba63d5749 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -41,7 +41,6 @@
 #include "ToolChains/PPCFreeBSD.h"
 #include "ToolChains/PPCLinux.h"
 #include "ToolChains/PS4CPU.h"
-#include "ToolChains/RISCVToolchain.h"
 #include "ToolChains/SPIRV.h"
 #include "ToolChains/SPIRVOpenMP.h"
 #include "ToolChains/SYCL.h"
@@ -6889,16 +6888,11 @@ const ToolChain &Driver::getToolChain(const ArgList 
&Args,
 TC = std::make_unique(*this, Target, Args);
 break;
   case llvm::Triple::msp430:
-TC =
-std::make_unique(*this, Target, Args);
+TC = std::make_unique(*this, Target, 
Args);
 break;
   case llvm::Triple::riscv32:
   case llvm::Triple::riscv64:
-if (toolchains::RISCVToolChain::hasGCCToolchain(*this, Args))
-  TC =
-  std::make_unique(*this, Target, 
Args);
-else
-  TC = std::make_unique(*this, Target, Args);
+TC = std::make_unique(*this, Target, Args);
 break;
   case llvm::Triple::ve:
 TC = std::make_unique(*this, Target, Args);
diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp 
b/clang/lib/Driver/ToolChains/BareMetal.cpp
index feb1ebd34854a..09fc00e1e0544 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -353,6 +353,26 @@ BareMetal::OrderedMultilibs 
BareMetal::getOrderedMultilibs() const {
   return llvm::reverse(Default);
 }
 
+ToolChain::CXXStdlibType BareMetal::GetDefaultCXXStdlibType() const {
+  if (getTriple().isRISCV() && GCCInstallation.isValid())
+return ToolChain::CST_Libstdcxx;
+  return ToolChain::CST_Libcxx;
+}
+
+ToolChain::RuntimeLibType BareMetal::GetDefaultRuntimeLibType() const {
+  if (getTriple().isRISCV() && GCCInstallation.isValid())
+return ToolChain::RLT_Libgcc;
+  return ToolChain::RLT_CompilerRT;
+}
+
+ToolChain::UnwindLibType
+BareMetal::GetUnwindLibType(const llvm::opt::ArgList &Args) const {
+  if (getTriple().isRISCV())
+return ToolChain::UNW_None;
+
+  return ToolChain::GetUnwindLibType(Args);
+}
+
 void BareMetal::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
   ArgStringList &CC1Args) const {
   if (DriverArgs.hasArg(options::OPT_nostdinc))
diff --git a/clang/lib/Driver/ToolChains/BareMetal.h 
b/clang/lib/Driver/ToolChains/BareMetal.h
index 87f173342def2..580f5c6903c1f 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.h
+++ b/clang/lib/Driver/ToolChains/BareMetal.h
@@ -54,12 +54,11 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public 
Generic_ELF {
 return UnwindTableLevel::None;
   }
 
-  RuntimeLibType GetDefaultRuntimeLibType() const override {
-return ToolChain::RLT_CompilerRT;
-  }
-  CXXStdlibType GetDefaultCXXStdlibType() const overri

[llvm-branch-commits] [clang] [Driver] Forward sysroot from Driver to linker in BareMetal ToolChain Object (PR #132808)

2025-05-26 Thread Garvit Gupta via llvm-branch-commits

https://github.com/quic-garvgupt updated 
https://github.com/llvm/llvm-project/pull/132808

>From d5a080757da2510828b6d8957a83bd1839f25ec4 Mon Sep 17 00:00:00 2001
From: Garvit Gupta 
Date: Mon, 24 Mar 2025 07:04:59 -0700
Subject: [PATCH] [Driver] Forward sysroot from Driver to linker in BareMetal
 ToolChain Object

RISCVToolChain object passes `--sysroot` option from clang to gnuld. Adding
the supprt for the same in BareMetal toolchain object.

This is done as a part of the effort to merge RISCVToolchain object into
BareMetal toolchain object.

This is the 5th patch in the series of patches for merging RISCVToolchain object
into BareMetal toolchain object.

RFC:
https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524

Change-Id: Ie830bf6d126fea46dc225e5ef97e14349765ba07
---
 clang/lib/Driver/ToolChains/BareMetal.cpp |  3 +
 clang/test/Driver/aarch64-toolchain.c |  5 +-
 clang/test/Driver/arm-toolchain.c |  3 +
 clang/test/Driver/baremetal.cpp   | 96 +--
 4 files changed, 82 insertions(+), 25 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp 
b/clang/lib/Driver/ToolChains/BareMetal.cpp
index 18d4bba1b1fbe..4a4b94adefb44 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -546,6 +546,9 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
   const llvm::Triple::ArchType Arch = TC.getArch();
   const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
 
+  if (!D.SysRoot.empty())
+CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
+
   CmdArgs.push_back("-Bstatic");
 
   if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax))
diff --git a/clang/test/Driver/aarch64-toolchain.c 
b/clang/test/Driver/aarch64-toolchain.c
index 49b12bbbeb716..f0e0ed5a6b7e6 100644
--- a/clang/test/Driver/aarch64-toolchain.c
+++ b/clang/test/Driver/aarch64-toolchain.c
@@ -29,6 +29,7 @@
 // C-AARCH64-BAREMETAL: "-isysroot" 
"{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
 // C-AARCH64-BAREMETAL: "-internal-isystem" 
"{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include"
 // C-AARCH64-BAREMETAL: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
+// C-AARCH64-BAREMETAL: 
"--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
 // C-AARCH64-BAREMETAL: "-Bstatic" "-EL"
 // C-AARCH64-BAREMETAL: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o"
 // C-AARCH64-BAREMETAL: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
@@ -65,6 +66,7 @@
 // CXX-AARCH64-BAREMETAL: "-internal-isystem" 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1"
 // CXX-AARCH64-BAREMETAL: "-internal-isystem" 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include"
 // CXX-AARCH64-BAREMETAL: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
+// CXX-AARCH64-BAREMETAL: 
"--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
 // CXX-AARCH64-BAREMETAL: "-Bstatic" "-EL"
 // CXX-AARCH64-BAREMETAL: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o"
 // CXX-AARCH64-BAREMETAL: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
@@ -101,7 +103,8 @@
 // CXX-AARCH64-BAREMETAL-LIBCXX: "-isysroot" 
"{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/v1"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include"
-// CXX-AARCH64-BAREMETAL-LIBCXX: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld
+// CXX-AARCH64-BAREMETAL-LIBCXX: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
+// CXX-AARCH64-BAREMETAL-LIBCXX: 
"--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "-Bstatic" "-EL"
 // CXX-AARCH64-BAREMETAL-LIBCXX: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o"
 // CXX-AARCH64-BAREMETAL-LIBCXX: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
diff --git a/clang/test/Driver/arm-toolchain.c 
b/clang/test/Driver/arm-toolchain.c
index 54b590a569bbf..a57b5a09026b4 100644
--- a/clang/test/Driver/arm-toolchain.c
+++ b/clang/test/Driver/arm-toolchain.c
@@ -28,6 +28,7 @@
 // C-ARM-BAREMETAL: "-isysroot" 
"{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi"
 // C-ARM-BAREMETAL: "-internal-isystem" 
"{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include"
 // C-ARM-BAREMETAL: 
"{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld"
+// C-ARM-BAREMETAL: 
"--sysroot={{.*}}/Inputs/basic_arm_

[llvm-branch-commits] [clang] [Driver] Add option to force undefined symbols during linking in BareMetal toolchain object. (PR #132807)

2025-05-26 Thread Garvit Gupta via llvm-branch-commits

https://github.com/quic-garvgupt updated 
https://github.com/llvm/llvm-project/pull/132807

>From ff6d71e7bfc6a362479e893787ee6f8fcccd9bf5 Mon Sep 17 00:00:00 2001
From: Garvit Gupta 
Date: Mon, 24 Mar 2025 06:49:09 -0700
Subject: [PATCH] [Driver] Add option to force udnefined symbols during linking
 in BareMetal toolchain object.

Add support for `-u` option to force defined symbols. This option is supported
by both lld and gnuld.

This is done as a part of the effort to merge RISCVToolchain object into
BareMetal toolchain object.

This is the 4th patch in the series of patches for merging RISCVToolchain object
into BareMetal toolchain object.

RFC:
https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524

Change-Id: Ia6597c756923a77fd9c7cb9a6ae8e52a56f5457d
---
 clang/lib/Driver/ToolChains/BareMetal.cpp   |  5 +++--
 clang/test/Driver/baremetal-undefined-symbols.c | 15 +++
 clang/test/Driver/riscv-args.c  |  6 --
 3 files changed, 18 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/Driver/baremetal-undefined-symbols.c
 delete mode 100644 clang/test/Driver/riscv-args.c

diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp 
b/clang/lib/Driver/ToolChains/BareMetal.cpp
index 7a7aa8441aacb..18d4bba1b1fbe 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -587,8 +587,9 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 }
   }
 
-  Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group,
-options::OPT_s, options::OPT_t, options::OPT_r});
+  Args.addAllArgs(CmdArgs,
+  {options::OPT_L, options::OPT_u, options::OPT_T_Group,
+   options::OPT_s, options::OPT_t, options::OPT_r});
 
   TC.AddFilePathLibArgs(Args, CmdArgs);
 
diff --git a/clang/test/Driver/baremetal-undefined-symbols.c 
b/clang/test/Driver/baremetal-undefined-symbols.c
new file mode 100644
index 0..0ce0db43bccad
--- /dev/null
+++ b/clang/test/Driver/baremetal-undefined-symbols.c
@@ -0,0 +1,15 @@
+// Check the arguments are correctly passed
+
+// Make sure -T is the last with gcc-toolchain option
+// RUN: %clang -### --target=riscv32 --gcc-toolchain= -Xlinker --defsym=FOO=10 
-T a.lds -u foo %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-LD %s
+// CHECK-LD: {{.*}} "--defsym=FOO=10" {{.*}} "-u" "foo" {{.*}} "-T" "a.lds"
+
+// TODO: Merge this test with the above in the last patch when finally 
integrating riscv
+// Make sure -T is the last with gcc-toolchain option
+// RUN: %clang -### --target=aarch64-none-elf --gcc-toolchain= -Xlinker 
--defsym=FOO=10 -T a.lds -u foo %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-ARM-LD %s
+// RUN: %clang -### --target=armv6m-none-eabi --gcc-toolchain= -Xlinker 
--defsym=FOO=10 -T a.lds -u foo %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-ARM-LD %s
+// CHECK-ARM-LD: {{.*}} "-T" "a.lds" "-u" "foo" {{.*}} "--defsym=FOO=10"
+
diff --git a/clang/test/Driver/riscv-args.c b/clang/test/Driver/riscv-args.c
deleted file mode 100644
index cab08e5b0f811..0
--- a/clang/test/Driver/riscv-args.c
+++ /dev/null
@@ -1,6 +0,0 @@
-// Check the arguments are correctly passed
-
-// Make sure -T is the last with gcc-toolchain option
-// RUN: %clang -### --target=riscv32 --gcc-toolchain= -Xlinker --defsym=FOO=10 
-T a.lds -u foo %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK-LD %s
-// CHECK-LD: {{.*}} "--defsym=FOO=10" {{.*}} "-u" "foo" {{.*}} "-T" "a.lds"

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [Driver] Add support for crtbegin.o, crtend.o and libgloss lib to BareMetal toolchain object (PR #121830)

2025-05-26 Thread Garvit Gupta via llvm-branch-commits

https://github.com/quic-garvgupt updated 
https://github.com/llvm/llvm-project/pull/121830

>From 74709e5c34d48ececa5d500bf7f450e3fafa821a Mon Sep 17 00:00:00 2001
From: Garvit Gupta 
Date: Mon, 24 Mar 2025 04:58:57 -0700
Subject: [PATCH] [Driver] Add support for crtbegin.o, crtend.o and libgloss
 lib to BareMetal toolchain object

This patch conditionalise the addition of crt{begin,end}.o object files along
with addition of -lgloss lib based on whether libc selected is newlib or llvm
libc. Since there is no way a user can specify which libc it wants to link
against, currently passing valid GCCInstallation to driver will select newlib
otherwise it will default to llvm libc.

Moreover, this patch makes gnuld the default linker for baremetal toolchain
object. User need to pass `-fuse-ld=lld` explicitly to driver to select lld

This is the 2nd patch in the series of patches of merging RISCVToolchain into
BareMetal toolchain object.

RFC:
https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524

Change-Id: Ie06dc976c306cf04ec2733bbb2d271c57d201f86
---
 clang/lib/Driver/ToolChains/BareMetal.cpp   | 38 +++-
 clang/lib/Driver/ToolChains/BareMetal.h |  3 +-
 clang/test/Driver/aarch64-toolchain-extra.c | 13 ++-
 clang/test/Driver/aarch64-toolchain.c   | 95 
 clang/test/Driver/arm-toolchain-extra.c |  7 ++
 clang/test/Driver/arm-toolchain.c   | 99 -
 clang/test/Driver/baremetal.cpp |  3 +-
 clang/test/Driver/sanitizer-ld.c|  2 +-
 8 files changed, 247 insertions(+), 13 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp 
b/clang/lib/Driver/ToolChains/BareMetal.cpp
index a3bc7bdfc817e..8411b1c0bf866 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -562,9 +562,31 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL");
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles,
-   options::OPT_r)) {
-CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o")));
+  bool NeedCRTs =
+  !Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles);
+
+  const char *CRTBegin, *CRTEnd;
+  if (NeedCRTs) {
+if (!Args.hasArg(options::OPT_r))
+  CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o")));
+if (TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) {
+  auto RuntimeLib = TC.GetRuntimeLibType(Args);
+  switch (RuntimeLib) {
+  case (ToolChain::RLT_Libgcc): {
+CRTBegin = "crtbegin.o";
+CRTEnd = "crtend.o";
+break;
+  }
+  case (ToolChain::RLT_CompilerRT): {
+CRTBegin =
+TC.getCompilerRTArgString(Args, "crtbegin", ToolChain::FT_Object);
+CRTEnd =
+TC.getCompilerRTArgString(Args, "crtend", ToolChain::FT_Object);
+break;
+  }
+  }
+  CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTBegin)));
+}
   }
 
   Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group,
@@ -587,9 +609,12 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
+CmdArgs.push_back("--start-group");
 AddRunTimeLibs(TC, D, CmdArgs, Args);
-
 CmdArgs.push_back("-lc");
+if (TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D))
+  CmdArgs.push_back("-lgloss");
+CmdArgs.push_back("--end-group");
   }
 
   if (D.isUsingLTO()) {
@@ -605,6 +630,11 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 addLTOOptions(TC, Args, CmdArgs, Output, *Input,
   D.getLTOMode() == LTOK_Thin);
   }
+
+  if ((TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) &&
+  NeedCRTs)
+CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTEnd)));
+
   if (TC.getTriple().isRISCV())
 CmdArgs.push_back("-X");
 
diff --git a/clang/lib/Driver/ToolChains/BareMetal.h 
b/clang/lib/Driver/ToolChains/BareMetal.h
index 2a791e7672e5e..87f173342def2 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.h
+++ b/clang/lib/Driver/ToolChains/BareMetal.h
@@ -36,6 +36,7 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF {
   Tool *buildStaticLibTool() const override;
 
 public:
+  bool hasValidGCCInstallation() const { return GCCInstallation.isValid(); }
   bool isBareMetal() const override { return true; }
   bool isCrossCompiling() const override { return true; }
   bool HasNativeLLVMSupport() const override { return true; }
@@ -60,8 +61,6 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF {
 return ToolChain::CST_Libcxx;
   }
 
-  const char *getDefaultLinker() const override { return "ld.lld"; }
-
   void
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
   

[llvm-branch-commits] [clang] [Driver] Add support for crtbegin.o, crtend.o and libgloss lib to BareMetal toolchain object (PR #121830)

2025-05-26 Thread Garvit Gupta via llvm-branch-commits


@@ -1,5 +1,12 @@
 // UNSUPPORTED: system-windows
 
+// Test interaction with -fuse-ld=lld

quic-garvgupt wrote:

Done

https://github.com/llvm/llvm-project/pull/121830
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [RISCV] Integrate RISCV target in baremetal toolchain object and deprecate RISCVToolchain object (PR #121831)

2025-05-26 Thread Garvit Gupta via llvm-branch-commits

quic-garvgupt wrote:

Gentle Ping!


https://github.com/llvm/llvm-project/pull/121831
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [Driver] Fix link order of BareMetal toolchain object (PR #132806)

2025-05-26 Thread Garvit Gupta via llvm-branch-commits

https://github.com/quic-garvgupt updated 
https://github.com/llvm/llvm-project/pull/132806

>From dcff9da5684bd245685cac5d17640072ce457e6e Mon Sep 17 00:00:00 2001
From: Garvit Gupta 
Date: Mon, 24 Mar 2025 06:17:42 -0700
Subject: [PATCH] [Driver] Fix link order of BareMetal toolchain object

The linker job in BareMetal toolchain object will be used by gnuld and lld both.
However, gnuld process the arguments in the order in which they appear on 
command
line, whereas there is no such restriction with lld.

The previous order was:
LibraryPaths -> Libraries -> LTOOptions -> LinkerInputs
The new iorder is:
LibraryPaths -> LTOOptions -> LinkerInputs -> Libraries

LTO options need to be added before adding any linker inputs because file format
after compile stage during LTO is bitcode which gnuld natively cannot process.
Hence iwill need to pass appropriate plugins before adding any bitcode file on 
the
command line.

Object files that are getting linked need to be passed before processing any
libraries so that gnuld can appropriately do symbol resolution for the symbols
for which no definition is provided through user code.

Similar link order is also followed by other linker jobs for gnuld such as in
gnutools::Linker in Gnu.cpp

This is the 3rd patch in the series of patches of merging RISCVToolchain into
BareMetal toolchain object.

RFC:
https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524

Change-Id: I0e68e403c08b5687cc3346e833981f7b9f3819c4
---
 clang/lib/Driver/ToolChains/BareMetal.cpp   | 32 -
 clang/test/Driver/aarch64-toolchain-extra.c |  2 +-
 clang/test/Driver/aarch64-toolchain.c   | 24 +++
 clang/test/Driver/arm-toolchain-extra.c |  2 +-
 clang/test/Driver/arm-toolchain.c   | 24 +++
 clang/test/Driver/baremetal-multilib.yaml   |  3 +-
 clang/test/Driver/baremetal-sysroot.cpp |  8 ++-
 clang/test/Driver/baremetal.cpp | 79 +
 8 files changed, 98 insertions(+), 76 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp 
b/clang/lib/Driver/ToolChains/BareMetal.cpp
index 8411b1c0bf866..7a7aa8441aacb 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -546,8 +546,6 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
   const llvm::Triple::ArchType Arch = TC.getArch();
   const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
 
-  AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA);
-
   CmdArgs.push_back("-Bstatic");
 
   if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax))
@@ -597,6 +595,22 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
   for (const auto &LibPath : TC.getLibraryPaths())
 CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-L", LibPath)));
 
+  if (D.isUsingLTO()) {
+assert(!Inputs.empty() && "Must have at least one input.");
+// Find the first filename InputInfo object.
+auto Input = llvm::find_if(
+Inputs, [](const InputInfo &II) -> bool { return II.isFilename(); });
+if (Input == Inputs.end())
+  // For a very rare case, all of the inputs to the linker are
+  // InputArg. If that happens, just use the first InputInfo.
+  Input = Inputs.begin();
+
+addLTOOptions(TC, Args, CmdArgs, Output, *Input,
+  D.getLTOMode() == LTOK_Thin);
+  }
+
+  AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA);
+
   if (TC.ShouldLinkCXXStdlib(Args)) {
 bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
!Args.hasArg(options::OPT_static);
@@ -617,20 +631,6 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 CmdArgs.push_back("--end-group");
   }
 
-  if (D.isUsingLTO()) {
-assert(!Inputs.empty() && "Must have at least one input.");
-// Find the first filename InputInfo object.
-auto Input = llvm::find_if(
-Inputs, [](const InputInfo &II) -> bool { return II.isFilename(); });
-if (Input == Inputs.end())
-  // For a very rare case, all of the inputs to the linker are
-  // InputArg. If that happens, just use the first InputInfo.
-  Input = Inputs.begin();
-
-addLTOOptions(TC, Args, CmdArgs, Output, *Input,
-  D.getLTOMode() == LTOK_Thin);
-  }
-
   if ((TC.hasValidGCCInstallation() || hasGCCToolChainAlongSideClang(D)) &&
   NeedCRTs)
 CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTEnd)));
diff --git a/clang/test/Driver/aarch64-toolchain-extra.c 
b/clang/test/Driver/aarch64-toolchain-extra.c
index 2a930e35acd45..a0b5f2902962f 100644
--- a/clang/test/Driver/aarch64-toolchain-extra.c
+++ b/clang/test/Driver/aarch64-toolchain-extra.c
@@ -31,5 +31,5 @@
 // C-AARCH64-BAREMETAL-NOGCC: 
"{{.*}}/aarch64-nogcc/bin/../aarch64-none-elf/lib/crt0.o"
 // C-AARCH64-BAREMETAL-NOGCC: 
"{{.*}}/aarch64-nogcc/{{.*}}/aarch64-none-elf/lib/crtbegin.o"
 // C-AARCH64-BAREMETAL-NOGCC: 

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)

2025-05-26 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/132383

>From 50839949c8622e52027d378405d2edc99d9df3a7 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 8 May 2025 12:03:28 +0200
Subject: [PATCH] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and
 trunc

Uniform S1:
Truncs to uniform S1 and AnyExts from S1 are left as is as they are meant
to be combined away. Uniform S1 ZExt and SExt are lowered using select.
Divergent S1:
Trunc of VGPR to VCC is lowered as compare.
Extends of VCC are lowered using select.

For remaining types:
S32 to S64 ZExt and SExt are lowered using merge values, AnyExt and Trunc
are again left as is to be combined away.
Notably uniform S16 for SExt and Zext is not lowered to S32 and left as is
for instruction select to deal with them. This is because there are patterns
that check for S16 type.
---
 .../Target/AMDGPU/AMDGPURegBankLegalize.cpp   |   7 ++
 .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 110 +-
 .../AMDGPU/AMDGPURegBankLegalizeHelper.h  |   1 +
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp |  47 +++-
 .../AMDGPU/AMDGPURegBankLegalizeRules.h   |   3 +
 .../GlobalISel/regbankselect-and-s1.mir   | 105 +
 .../GlobalISel/regbankselect-anyext.mir   |  59 +-
 .../AMDGPU/GlobalISel/regbankselect-sext.mir  | 100 ++--
 .../AMDGPU/GlobalISel/regbankselect-trunc.mir |  22 +++-
 .../AMDGPU/GlobalISel/regbankselect-zext.mir  |  89 +-
 10 files changed, 360 insertions(+), 183 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 4d8d3022b8080..4f9beeaacfaee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -213,6 +213,13 @@ class AMDGPURegBankLegalizeCombiner {
   return;
 }
 
+if (DstTy == S64 && TruncSrcTy == S32) {
+  B.buildMergeLikeInstr(MI.getOperand(0).getReg(),
+{TruncSrc, B.buildUndef({SgprRB, S32})});
+  cleanUpAfterCombine(MI, Trunc);
+  return;
+}
+
 if (DstTy == S32 && TruncSrcTy == S16) {
   B.buildAnyExt(Dst, TruncSrc);
   cleanUpAfterCombine(MI, Trunc);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 050b6302a98c5..658982ceb68fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -133,6 +133,43 @@ void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, 
LLT WideTy,
   MI.eraseFromParent();
 }
 
+void RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr &MI) {
+  Register Dst = MI.getOperand(0).getReg();
+  LLT Ty = MRI.getType(Dst);
+  Register Src = MI.getOperand(1).getReg();
+  unsigned Opc = MI.getOpcode();
+  int TrueExtCst = Opc == G_SEXT ? -1 : 1;
+  if (Ty == S32 || Ty == S16) {
+auto True = B.buildConstant({VgprRB, Ty}, TrueExtCst);
+auto False = B.buildConstant({VgprRB, Ty}, 0);
+B.buildSelect(Dst, Src, True, False);
+  } else if (Ty == S64) {
+auto True = B.buildConstant({VgprRB_S32}, TrueExtCst);
+auto False = B.buildConstant({VgprRB_S32}, 0);
+auto Lo = B.buildSelect({VgprRB_S32}, Src, True, False);
+MachineInstrBuilder Hi;
+switch (Opc) {
+case G_SEXT:
+  Hi = Lo;
+  break;
+case G_ZEXT:
+  Hi = False;
+  break;
+case G_ANYEXT:
+  Hi = B.buildUndef({VgprRB_S32});
+  break;
+default:
+  llvm_unreachable("Opcode not supported");
+}
+
+B.buildMergeValues(Dst, {Lo.getReg(0), Hi.getReg(0)});
+  } else {
+llvm_unreachable("Type not supported");
+  }
+
+  MI.eraseFromParent();
+}
+
 static bool isSignedBFE(MachineInstr &MI) {
   if (GIntrinsic *GI = dyn_cast(&MI))
 return (GI->is(Intrinsic::amdgcn_sbfe));
@@ -256,26 +293,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
   switch (Mapping.LoweringMethod) {
   case DoNotLower:
 return;
-  case VccExtToSel: {
-LLT Ty = MRI.getType(MI.getOperand(0).getReg());
-Register Src = MI.getOperand(1).getReg();
-unsigned Opc = MI.getOpcode();
-if (Ty == S32 || Ty == S16) {
-  auto True = B.buildConstant({VgprRB, Ty}, Opc == G_SEXT ? -1 : 1);
-  auto False = B.buildConstant({VgprRB, Ty}, 0);
-  B.buildSelect(MI.getOperand(0).getReg(), Src, True, False);
-}
-if (Ty == S64) {
-  auto True = B.buildConstant({VgprRB, S32}, Opc == G_SEXT ? -1 : 1);
-  auto False = B.buildConstant({VgprRB, S32}, 0);
-  auto Sel = B.buildSelect({VgprRB, S32}, Src, True, False);
-  B.buildMergeValues(
-  MI.getOperand(0).getReg(),
-  {Sel.getReg(0), Opc == G_SEXT ? Sel.getReg(0) : False.getReg(0)});
-}
-MI.eraseFromParent();
-return;
-  }
+  case VccExtToSel:
+return lowerVccExtToSel(MI);
   case UniExtToSel: {
 LLT Ty = MRI.getType(MI.getOperand(0).g

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect authentication oracles (PR #135663)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/135663

>From 453eeffd3f4488e6877026f1f9e9c86be4f82d21 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Sat, 5 Apr 2025 14:54:01 +0300
Subject: [PATCH 1/6] [BOLT] Gadget scanner: detect authentication oracles

Implement the detection of authentication instructions whose results can
be inspected by an attacker to know whether authentication succeeded.

As the properties of output registers of authentication instructions are
inspected, add a second set of analysis-related classes to iterate over
the instructions in reverse order.
---
 bolt/include/bolt/Passes/PAuthGadgetScanner.h |  12 +
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 543 +
 .../AArch64/gs-pauth-authentication-oracles.s | 723 ++
 .../AArch64/gs-pauth-debug-output.s   |  78 ++
 4 files changed, 1356 insertions(+)
 create mode 100644 
bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s

diff --git a/bolt/include/bolt/Passes/PAuthGadgetScanner.h 
b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
index 98a49df862ebd..a3b39fcd5dc02 100644
--- a/bolt/include/bolt/Passes/PAuthGadgetScanner.h
+++ b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
@@ -284,6 +284,15 @@ class ClobberingInfo : public ExtraInfo {
   void print(raw_ostream &OS, const MCInstReference Location) const override;
 };
 
+class LeakageInfo : public ExtraInfo {
+  SmallVector LeakingInstrs;
+
+public:
+  LeakageInfo(const ArrayRef Instrs) : LeakingInstrs(Instrs) 
{}
+
+  void print(raw_ostream &OS, const MCInstReference Location) const override;
+};
+
 /// A brief version of a report that can be further augmented with the details.
 ///
 /// A half-baked report produced on the first run of the analysis. An extra,
@@ -324,6 +333,9 @@ class FunctionAnalysisContext {
   void findUnsafeUses(SmallVector> &Reports);
   void augmentUnsafeUseReports(ArrayRef> Reports);
 
+  void findUnsafeDefs(SmallVector> &Reports);
+  void augmentUnsafeDefReports(ArrayRef> Reports);
+
   /// Process the reports which do not have to be augmented, and remove them
   /// from Reports.
   void handleSimpleReports(SmallVector> &Reports);
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 971ea5fdef420..b3f3d3a1aa639 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -717,6 +717,459 @@ SrcSafetyAnalysis::create(BinaryFunction &BF,
RegsToTrackInstsFor);
 }
 
+/// A state representing which registers are safe to be used as the destination
+/// operand of an authentication instruction.
+///
+/// Similar to SrcState, it is the analysis that should take register aliasing
+/// into account.
+///
+/// Depending on the implementation, it may be possible that an authentication
+/// instruction returns an invalid pointer on failure instead of terminating
+/// the program immediately (assuming the program will crash as soon as that
+/// pointer is dereferenced). To prevent brute-forcing the correct signature,
+/// it should be impossible for an attacker to test if a pointer is correctly
+/// signed - either the program should be terminated on authentication failure
+/// or it should be impossible to tell whether authentication succeeded or not.
+///
+/// For that reason, a restricted set of operations is allowed on any register
+/// containing a value derived from the result of an authentication instruction
+/// until that register is either wiped or checked not to contain a result of a
+/// failed authentication.
+///
+/// Specifically, the safety property for a register is computed by iterating
+/// the instructions in backward order: the source register Xn of an 
instruction
+/// Inst is safe if at least one of the following is true:
+/// * Inst checks if Xn contains the result of a successful authentication and
+///   terminates the program on failure. Note that Inst can either naturally
+///   dereference Xn (load, branch, return, etc. instructions) or be the first
+///   instruction of an explicit checking sequence.
+/// * Inst performs safe address arithmetic AND both source and result
+///   registers, as well as any temporary registers, must be safe after
+///   execution of Inst (temporaries are not used on AArch64 and thus not
+///   currently supported/allowed).
+///   See MCPlusBuilder::analyzeAddressArithmeticsForPtrAuth for the details.
+/// * Inst fully overwrites Xn with an unrelated value.
+struct DstState {
+  /// The set of registers whose values cannot be inspected by an attacker in
+  /// a way usable as an authentication oracle. The results of authentication
+  /// instructions should be written to such registers.
+  BitVector CannotEscapeUnchecked;
+
+  std::vector> FirstInstLeakingReg;
+
+  /// Construct an empty state.
+  DstState() {}
+
+  DstState(unsigned NumRegs, unsigned NumRegsToTrack)
+  : Cannot

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect authentication oracles (PR #135663)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/135663

>From 453eeffd3f4488e6877026f1f9e9c86be4f82d21 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Sat, 5 Apr 2025 14:54:01 +0300
Subject: [PATCH 1/6] [BOLT] Gadget scanner: detect authentication oracles

Implement the detection of authentication instructions whose results can
be inspected by an attacker to know whether authentication succeeded.

As the properties of output registers of authentication instructions are
inspected, add a second set of analysis-related classes to iterate over
the instructions in reverse order.
---
 bolt/include/bolt/Passes/PAuthGadgetScanner.h |  12 +
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 543 +
 .../AArch64/gs-pauth-authentication-oracles.s | 723 ++
 .../AArch64/gs-pauth-debug-output.s   |  78 ++
 4 files changed, 1356 insertions(+)
 create mode 100644 
bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s

diff --git a/bolt/include/bolt/Passes/PAuthGadgetScanner.h 
b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
index 98a49df862ebd..a3b39fcd5dc02 100644
--- a/bolt/include/bolt/Passes/PAuthGadgetScanner.h
+++ b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
@@ -284,6 +284,15 @@ class ClobberingInfo : public ExtraInfo {
   void print(raw_ostream &OS, const MCInstReference Location) const override;
 };
 
+class LeakageInfo : public ExtraInfo {
+  SmallVector LeakingInstrs;
+
+public:
+  LeakageInfo(const ArrayRef Instrs) : LeakingInstrs(Instrs) 
{}
+
+  void print(raw_ostream &OS, const MCInstReference Location) const override;
+};
+
 /// A brief version of a report that can be further augmented with the details.
 ///
 /// A half-baked report produced on the first run of the analysis. An extra,
@@ -324,6 +333,9 @@ class FunctionAnalysisContext {
   void findUnsafeUses(SmallVector> &Reports);
   void augmentUnsafeUseReports(ArrayRef> Reports);
 
+  void findUnsafeDefs(SmallVector> &Reports);
+  void augmentUnsafeDefReports(ArrayRef> Reports);
+
   /// Process the reports which do not have to be augmented, and remove them
   /// from Reports.
   void handleSimpleReports(SmallVector> &Reports);
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 971ea5fdef420..b3f3d3a1aa639 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -717,6 +717,459 @@ SrcSafetyAnalysis::create(BinaryFunction &BF,
RegsToTrackInstsFor);
 }
 
+/// A state representing which registers are safe to be used as the destination
+/// operand of an authentication instruction.
+///
+/// Similar to SrcState, it is the analysis that should take register aliasing
+/// into account.
+///
+/// Depending on the implementation, it may be possible that an authentication
+/// instruction returns an invalid pointer on failure instead of terminating
+/// the program immediately (assuming the program will crash as soon as that
+/// pointer is dereferenced). To prevent brute-forcing the correct signature,
+/// it should be impossible for an attacker to test if a pointer is correctly
+/// signed - either the program should be terminated on authentication failure
+/// or it should be impossible to tell whether authentication succeeded or not.
+///
+/// For that reason, a restricted set of operations is allowed on any register
+/// containing a value derived from the result of an authentication instruction
+/// until that register is either wiped or checked not to contain a result of a
+/// failed authentication.
+///
+/// Specifically, the safety property for a register is computed by iterating
+/// the instructions in backward order: the source register Xn of an 
instruction
+/// Inst is safe if at least one of the following is true:
+/// * Inst checks if Xn contains the result of a successful authentication and
+///   terminates the program on failure. Note that Inst can either naturally
+///   dereference Xn (load, branch, return, etc. instructions) or be the first
+///   instruction of an explicit checking sequence.
+/// * Inst performs safe address arithmetic AND both source and result
+///   registers, as well as any temporary registers, must be safe after
+///   execution of Inst (temporaries are not used on AArch64 and thus not
+///   currently supported/allowed).
+///   See MCPlusBuilder::analyzeAddressArithmeticsForPtrAuth for the details.
+/// * Inst fully overwrites Xn with an unrelated value.
+struct DstState {
+  /// The set of registers whose values cannot be inspected by an attacker in
+  /// a way usable as an authentication oracle. The results of authentication
+  /// instructions should be written to such registers.
+  BitVector CannotEscapeUnchecked;
+
+  std::vector> FirstInstLeakingReg;
+
+  /// Construct an empty state.
+  DstState() {}
+
+  DstState(unsigned NumRegs, unsigned NumRegsToTrack)
+  : Cannot

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: account for BRK when searching for auth oracles (PR #137975)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/137975

>From 20b95e7ee5adba6cf09be4667cbfe4d2341e1ba0 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Wed, 30 Apr 2025 16:08:10 +0300
Subject: [PATCH] [BOLT] Gadget scanner: account for BRK when searching for
 auth oracles

An authenticated pointer can be explicitly checked by the compiler via a
sequence of instructions that executes BRK on failure. It is important
to recognize such BRK instruction as checking every register (as it is
expected to immediately trigger an abnormal program termination) to
prevent false positive reports about authentication oracles:

autia   x2, x3
autia   x0, x1
; neither x0 nor x2 are checked at this point
eor x16, x0, x0, lsl #1
tbz x16, #62, on_success ; marks x0 as checked
; end of BB: for x2 to be checked here, it must be checked in both
; successor basic blocks
  on_failure:
brk 0xc470
  on_success:
; x2 is checked
ldr x1, [x2] ; marks x2 as checked
---
 bolt/include/bolt/Core/MCPlusBuilder.h| 14 ++
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 13 +-
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 24 --
 .../AArch64/gs-pauth-address-checks.s | 44 +--
 .../AArch64/gs-pauth-authentication-oracles.s |  9 ++--
 .../AArch64/gs-pauth-signing-oracles.s|  6 +--
 6 files changed, 75 insertions(+), 35 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h 
b/bolt/include/bolt/Core/MCPlusBuilder.h
index b233452985502..c8cbcaf33f4b5 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -707,6 +707,20 @@ class MCPlusBuilder {
 return false;
   }
 
+  /// Returns true if Inst is a trap instruction.
+  ///
+  /// Tests if Inst is an instruction that immediately causes an abnormal
+  /// program termination, for example when a security violation is detected
+  /// by a compiler-inserted check.
+  ///
+  /// @note An implementation of this method should likely return false for
+  /// calls to library functions like abort(), as it is possible that the
+  /// execution state is partially attacker-controlled at this point.
+  virtual bool isTrap(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return false;
+  }
+
   virtual bool isBreakpoint(const MCInst &Inst) const {
 llvm_unreachable("not implemented");
 return false;
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 49087eab3ce9a..a9d46ed3f49d0 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -1050,6 +1050,15 @@ class DstSafetyAnalysis {
   dbgs() << ")\n";
 });
 
+// If this instruction terminates the program immediately, no
+// authentication oracles are possible past this point.
+if (BC.MIB->isTrap(Point)) {
+  LLVM_DEBUG({ traceInst(BC, "Trap instruction found", Point); });
+  DstState Next(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
+  Next.CannotEscapeUnchecked.set();
+  return Next;
+}
+
 // If this instruction is reachable by the analysis, a non-empty state will
 // be propagated to it sooner or later. Until then, skip computeNext().
 if (Cur.empty()) {
@@ -1155,8 +1164,8 @@ class DataflowDstSafetyAnalysis
 //
 // A basic block without any successors, on the other hand, can be
 // pessimistically initialized to everything-is-unsafe: this will naturally
-// handle both return and tail call instructions and is harmless for
-// internal indirect branch instructions (such as computed gotos).
+// handle return, trap and tail call instructions. At the same time, it is
+// harmless for internal indirect branch instructions, like computed gotos.
 if (BB.succ_empty())
   return createUnsafeState();
 
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp 
b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 9d5a578cfbdff..b669d32cc2032 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -386,10 +386,9 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
 // the list of successors of this basic block as appropriate.
 
 // Any of the above code sequences assume the fall-through basic block
-// is a dead-end BRK instruction (any immediate operand is accepted).
+// is a dead-end trap instruction.
 const BinaryBasicBlock *BreakBB = BB.getFallthrough();
-if (!BreakBB || BreakBB->empty() ||
-BreakBB->front().getOpcode() != AArch64::BRK)
+if (!BreakBB || BreakBB->empty() || !isTrap(BreakBB->front()))
   return std::nullopt;
 
 // Iterate over the instructions of BB in reverse order, matching opcodes
@@ -1751,6 +1750,25 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
 Inst.addOperand(MCOperand::createImm(0));
   }
 

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: account for BRK when searching for auth oracles (PR #137975)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/137975

>From 20b95e7ee5adba6cf09be4667cbfe4d2341e1ba0 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Wed, 30 Apr 2025 16:08:10 +0300
Subject: [PATCH] [BOLT] Gadget scanner: account for BRK when searching for
 auth oracles

An authenticated pointer can be explicitly checked by the compiler via a
sequence of instructions that executes BRK on failure. It is important
to recognize such BRK instruction as checking every register (as it is
expected to immediately trigger an abnormal program termination) to
prevent false positive reports about authentication oracles:

autia   x2, x3
autia   x0, x1
; neither x0 nor x2 are checked at this point
eor x16, x0, x0, lsl #1
tbz x16, #62, on_success ; marks x0 as checked
; end of BB: for x2 to be checked here, it must be checked in both
; successor basic blocks
  on_failure:
brk 0xc470
  on_success:
; x2 is checked
ldr x1, [x2] ; marks x2 as checked
---
 bolt/include/bolt/Core/MCPlusBuilder.h| 14 ++
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 13 +-
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 24 --
 .../AArch64/gs-pauth-address-checks.s | 44 +--
 .../AArch64/gs-pauth-authentication-oracles.s |  9 ++--
 .../AArch64/gs-pauth-signing-oracles.s|  6 +--
 6 files changed, 75 insertions(+), 35 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h 
b/bolt/include/bolt/Core/MCPlusBuilder.h
index b233452985502..c8cbcaf33f4b5 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -707,6 +707,20 @@ class MCPlusBuilder {
 return false;
   }
 
+  /// Returns true if Inst is a trap instruction.
+  ///
+  /// Tests if Inst is an instruction that immediately causes an abnormal
+  /// program termination, for example when a security violation is detected
+  /// by a compiler-inserted check.
+  ///
+  /// @note An implementation of this method should likely return false for
+  /// calls to library functions like abort(), as it is possible that the
+  /// execution state is partially attacker-controlled at this point.
+  virtual bool isTrap(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return false;
+  }
+
   virtual bool isBreakpoint(const MCInst &Inst) const {
 llvm_unreachable("not implemented");
 return false;
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 49087eab3ce9a..a9d46ed3f49d0 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -1050,6 +1050,15 @@ class DstSafetyAnalysis {
   dbgs() << ")\n";
 });
 
+// If this instruction terminates the program immediately, no
+// authentication oracles are possible past this point.
+if (BC.MIB->isTrap(Point)) {
+  LLVM_DEBUG({ traceInst(BC, "Trap instruction found", Point); });
+  DstState Next(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
+  Next.CannotEscapeUnchecked.set();
+  return Next;
+}
+
 // If this instruction is reachable by the analysis, a non-empty state will
 // be propagated to it sooner or later. Until then, skip computeNext().
 if (Cur.empty()) {
@@ -1155,8 +1164,8 @@ class DataflowDstSafetyAnalysis
 //
 // A basic block without any successors, on the other hand, can be
 // pessimistically initialized to everything-is-unsafe: this will naturally
-// handle both return and tail call instructions and is harmless for
-// internal indirect branch instructions (such as computed gotos).
+// handle return, trap and tail call instructions. At the same time, it is
+// harmless for internal indirect branch instructions, like computed gotos.
 if (BB.succ_empty())
   return createUnsafeState();
 
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp 
b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 9d5a578cfbdff..b669d32cc2032 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -386,10 +386,9 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
 // the list of successors of this basic block as appropriate.
 
 // Any of the above code sequences assume the fall-through basic block
-// is a dead-end BRK instruction (any immediate operand is accepted).
+// is a dead-end trap instruction.
 const BinaryBasicBlock *BreakBB = BB.getFallthrough();
-if (!BreakBB || BreakBB->empty() ||
-BreakBB->front().getOpcode() != AArch64::BRK)
+if (!BreakBB || BreakBB->empty() || !isTrap(BreakBB->front()))
   return std::nullopt;
 
 // Iterate over the instructions of BB in reverse order, matching opcodes
@@ -1751,6 +1750,25 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
 Inst.addOperand(MCOperand::createImm(0));
   }
 

[llvm-branch-commits] [llvm] [BOLT] Introduce helpers to match `MCInst`s one at a time (NFC) (PR #138883)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/138883

>From 7873996c4cd0e41cbae88060ef17f9168922476c Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Wed, 7 May 2025 16:42:00 +0300
Subject: [PATCH] [BOLT] Introduce helpers to match `MCInst`s one at a time
 (NFC)

Introduce matchInst helper function to capture and/or match the operands
of MCInst. Unlike the existing `MCPlusBuilder::MCInstMatcher` machinery,
matchInst is intended for the use cases when precise control over the
instruction order is required. For example, when validating PtrAuth
hardening, all registers are usually considered unsafe after a function
call, even though callee-saved registers should preserve their old
values *under normal operation*.
---
 bolt/include/bolt/Core/MCInstUtils.h  | 128 ++
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  90 +---
 2 files changed, 162 insertions(+), 56 deletions(-)

diff --git a/bolt/include/bolt/Core/MCInstUtils.h 
b/bolt/include/bolt/Core/MCInstUtils.h
index 69bf5e6159b74..50b7d56470c99 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -162,6 +162,134 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
   return Ref.print(OS);
 }
 
+/// Instruction-matching helpers operating on a single instruction at a time.
+///
+/// Unlike MCPlusBuilder::MCInstMatcher, this matchInst() function focuses on
+/// the cases where a precise control over the instruction order is important:
+///
+/// // Bring the short names into the local scope:
+/// using namespace MCInstMatcher;
+/// // Declare the registers to capture:
+/// Reg Xn, Xm;
+/// // Capture the 0th and 1st operands, match the 2nd operand against the
+/// // just captured Xm register, match the 3rd operand against literal 0:
+/// if (!matchInst(MaybeAdd, AArch64::ADDXrs, Xm, Xn, Xm, Imm(0))
+///   return AArch64::NoRegister;
+/// // Match the 0th operand against Xm:
+/// if (!matchInst(MaybeBr, AArch64::BR, Xm))
+///   return AArch64::NoRegister;
+/// // Return the matched register:
+/// return Xm.get();
+namespace MCInstMatcher {
+
+// The base class to match an operand of type T.
+//
+// The subclasses of OpMatcher are intended to be allocated on the stack and
+// to only be used by passing them to matchInst() and by calling their get()
+// function, thus the peculiar `mutable` specifiers: to make the calling code
+// compact and readable, the templated matchInst() function has to accept both
+// long-lived Imm/Reg wrappers declared as local variables (intended to capture
+// the first operand's value and match the subsequent operands, whether inside
+// a single instruction or across multiple instructions), as well as temporary
+// wrappers around literal values to match, f.e. Imm(42) or Reg(AArch64::XZR).
+template  class OpMatcher {
+  mutable std::optional Value;
+  mutable std::optional SavedValue;
+
+  // Remember/restore the last Value - to be called by matchInst.
+  void remember() const { SavedValue = Value; }
+  void restore() const { Value = SavedValue; }
+
+  template 
+  friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
+
+protected:
+  OpMatcher(std::optional ValueToMatch) : Value(ValueToMatch) {}
+
+  bool matchValue(T OpValue) const {
+// Check that OpValue does not contradict the existing Value.
+bool MatchResult = !Value || *Value == OpValue;
+// If MatchResult is false, all matchers will be reset before returning 
from
+// matchInst, including this one, thus no need to assign conditionally.
+Value = OpValue;
+
+return MatchResult;
+  }
+
+public:
+  /// Returns the captured value.
+  T get() const {
+assert(Value.has_value());
+return *Value;
+  }
+};
+
+class Reg : public OpMatcher {
+  bool matches(const MCOperand &Op) const {
+if (!Op.isReg())
+  return false;
+
+return matchValue(Op.getReg());
+  }
+
+  template 
+  friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
+
+public:
+  Reg(std::optional RegToMatch = std::nullopt)
+  : OpMatcher(RegToMatch) {}
+};
+
+class Imm : public OpMatcher {
+  bool matches(const MCOperand &Op) const {
+if (!Op.isImm())
+  return false;
+
+return matchValue(Op.getImm());
+  }
+
+  template 
+  friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
+
+public:
+  Imm(std::optional ImmToMatch = std::nullopt)
+  : OpMatcher(ImmToMatch) {}
+};
+
+/// Tries to match Inst and updates Ops on success.
+///
+/// If Inst has the specified Opcode and its operand list prefix matches Ops,
+/// this function returns true and updates Ops, otherwise false is returned and
+/// values of Ops are kept as before matchInst was called.
+///
+/// Please note that while Ops are technically passed by a const reference to
+/// make invocations like `matchInst(MI, Opcode, Imm(42))` possible, all their
+/// fields are marked mut

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: do not crash on debug-printing CFI instructions (PR #136151)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/136151

>From 1e97d81fb85bd2210ead550e46f703c979dec8f7 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 15 Apr 2025 21:47:18 +0300
Subject: [PATCH] [BOLT] Gadget scanner: do not crash on debug-printing CFI
 instructions

Some instruction-printing code used under LLVM_DEBUG does not handle CFI
instructions well. While CFI instructions seem to be harmless for the
correctness of the analysis results, they do not convey any useful
information to the analysis either, so skip them early.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 16 ++
 .../AArch64/gs-pauth-debug-output.s   | 32 +++
 2 files changed, 48 insertions(+)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 99d078931c9e9..adf6d57ac 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -429,6 +429,9 @@ class SrcSafetyAnalysis {
   }
 
   SrcState computeNext(const MCInst &Point, const SrcState &Cur) {
+if (BC.MIB->isCFI(Point))
+  return Cur;
+
 SrcStatePrinter P(BC);
 LLVM_DEBUG({
   dbgs() << "  SrcSafetyAnalysis::ComputeNext(";
@@ -703,6 +706,8 @@ class CFGUnawareSrcSafetyAnalysis : public 
SrcSafetyAnalysis,
 SrcState S = createEntryState();
 for (auto &I : BF.instrs()) {
   MCInst &Inst = I.second;
+  if (BC.MIB->isCFI(Inst))
+continue;
 
   // If there is a label before this instruction, it is possible that it
   // can be jumped-to, thus conservatively resetting S. As an exception,
@@ -982,6 +987,9 @@ class DstSafetyAnalysis {
   }
 
   DstState computeNext(const MCInst &Point, const DstState &Cur) {
+if (BC.MIB->isCFI(Point))
+  return Cur;
+
 DstStatePrinter P(BC);
 LLVM_DEBUG({
   dbgs() << "  DstSafetyAnalysis::ComputeNext(";
@@ -1151,6 +1159,8 @@ class CFGUnawareDstSafetyAnalysis : public 
DstSafetyAnalysis,
 DstState S = createUnsafeState();
 for (auto &I : llvm::reverse(BF.instrs())) {
   MCInst &Inst = I.second;
+  if (BC.MIB->isCFI(Inst))
+continue;
 
   // If Inst can change the control flow, we cannot be sure that the next
   // instruction (to be executed in analyzed program) is the one processed
@@ -1341,6 +1351,9 @@ void FunctionAnalysisContext::findUnsafeUses(
   });
 
   iterateOverInstrs(BF, [&](MCInstReference Inst) {
+if (BC.MIB->isCFI(Inst))
+  return;
+
 const SrcState &S = Analysis->getStateBefore(Inst);
 
 // If non-empty state was never propagated from the entry basic block
@@ -1404,6 +1417,9 @@ void FunctionAnalysisContext::findUnsafeDefs(
   });
 
   iterateOverInstrs(BF, [&](MCInstReference Inst) {
+if (BC.MIB->isCFI(Inst))
+  return;
+
 const DstState &S = Analysis->getStateAfter(Inst);
 
 if (auto Report = shouldReportAuthOracle(BC, Inst, S))
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s 
b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
index 61aa84377b88e..5aec945621987 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
@@ -329,6 +329,38 @@ auth_oracle:
 // PAUTH-EMPTY:
 // PAUTH-NEXT:   Attaching leakage info to: :  autia   x0, x1 
# DataflowDstSafetyAnalysis: dst-state
 
+// Gadget scanner should not crash on CFI instructions, including when 
debug-printing them.
+// Note that the particular debug output is not checked, but BOLT should be
+// compiled with assertions enabled to support -debug-only argument.
+
+.globl  cfi_inst_df
+.type   cfi_inst_df,@function
+cfi_inst_df:
+.cfi_startproc
+sub sp, sp, #16
+.cfi_def_cfa_offset 16
+add sp, sp, #16
+.cfi_def_cfa_offset 0
+ret
+.size   cfi_inst_df, .-cfi_inst_df
+.cfi_endproc
+
+.globl  cfi_inst_nocfg
+.type   cfi_inst_nocfg,@function
+cfi_inst_nocfg:
+.cfi_startproc
+sub sp, sp, #16
+.cfi_def_cfa_offset 16
+
+adr x0, 1f
+br  x0
+1:
+add sp, sp, #16
+.cfi_def_cfa_offset 0
+ret
+.size   cfi_inst_nocfg, .-cfi_inst_nocfg
+.cfi_endproc
+
 // CHECK-LABEL:Analyzing function main, AllocatorId = 1
 .globl  main
 .type   main,@function

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: optionally assume auth traps on failure (PR #139778)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/139778

>From 2c67c3f02d8474185ad6998142b05c1e7a0eeffa Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 13 May 2025 19:50:41 +0300
Subject: [PATCH] [BOLT] Gadget scanner: optionally assume auth traps on
 failure

On AArch64 it is possible for an auth instruction to either return an
invalid address value on failure (without FEAT_FPAC) or generate an
error (with FEAT_FPAC). It thus may be possible to never emit explicit
pointer checks, if the target CPU is known to support FEAT_FPAC.

This commit implements an --auth-traps-on-failure command line option,
which essentially makes "safe-to-dereference" and "trusted" register
properties identical and disables scanning for authentication oracles
completely.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 112 +++
 .../binary-analysis/AArch64/cmdline-args.test |   1 +
 .../AArch64/gs-pauth-authentication-oracles.s |   6 +-
 .../binary-analysis/AArch64/gs-pauth-calls.s  |   5 +-
 .../AArch64/gs-pauth-debug-output.s   | 177 ++---
 .../AArch64/gs-pauth-jump-table.s |   6 +-
 .../AArch64/gs-pauth-signing-oracles.s|  54 ++---
 .../AArch64/gs-pauth-tail-calls.s | 184 +-
 8 files changed, 318 insertions(+), 227 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index f979b1480d0b1..119cb928337a9 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -14,6 +14,7 @@
 #include "bolt/Passes/PAuthGadgetScanner.h"
 #include "bolt/Core/ParallelUtilities.h"
 #include "bolt/Passes/DataflowAnalysis.h"
+#include "bolt/Utils/CommandLineOpts.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/MC/MCInst.h"
@@ -26,6 +27,11 @@ namespace llvm {
 namespace bolt {
 namespace PAuthGadgetScanner {
 
+static cl::opt AuthTrapsOnFailure(
+"auth-traps-on-failure",
+cl::desc("Assume authentication instructions always trap on failure"),
+cl::cat(opts::BinaryAnalysisCategory));
+
 [[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef 
Label,
const MCInst &MI) {
   dbgs() << "  " << Label << ": ";
@@ -363,6 +369,34 @@ class SrcSafetyAnalysis {
 return Clobbered;
   }
 
+  std::optional getRegMadeTrustedByChecking(const MCInst &Inst,
+   SrcState Cur) const {
+// This functions cannot return multiple registers. This is never the case
+// on AArch64.
+std::optional RegCheckedByInst =
+BC.MIB->getAuthCheckedReg(Inst, /*MayOverwrite=*/false);
+if (RegCheckedByInst && Cur.SafeToDerefRegs[*RegCheckedByInst])
+  return *RegCheckedByInst;
+
+auto It = CheckerSequenceInfo.find(&Inst);
+if (It == CheckerSequenceInfo.end())
+  return std::nullopt;
+
+MCPhysReg RegCheckedBySequence = It->second.first;
+const MCInst *FirstCheckerInst = It->second.second;
+
+// FirstCheckerInst should belong to the same basic block (see the
+// assertion in DataflowSrcSafetyAnalysis::run()), meaning it was
+// deterministically processed a few steps before this instruction.
+const SrcState &StateBeforeChecker = getStateBefore(*FirstCheckerInst);
+
+// The sequence checks the register, but it should be authenticated before.
+if (!StateBeforeChecker.SafeToDerefRegs[RegCheckedBySequence])
+  return std::nullopt;
+
+return RegCheckedBySequence;
+  }
+
   // Returns all registers that can be treated as if they are written by an
   // authentication instruction.
   SmallVector getRegsMadeSafeToDeref(const MCInst &Point,
@@ -385,18 +419,38 @@ class SrcSafetyAnalysis {
 Regs.push_back(DstAndSrc->first);
 }
 
+// Make sure explicit checker sequence keeps register safe-to-dereference
+// when the register would be clobbered according to the regular rules:
+//
+//; LR is safe to dereference here
+//mov   x16, x30  ; start of the sequence, LR is s-t-d right before
+//xpaclri ; clobbers LR, LR is not safe anymore
+//cmp   x30, x16
+//b.eq  1f; end of the sequence: LR is marked as trusted
+//brk   0x1234
+//  1:
+//; at this point LR would be marked as trusted,
+//; but not safe-to-dereference
+//
+// or even just
+//
+//; X1 is safe to dereference here
+//ldr x0, [x1, #8]!
+//; X1 is trusted here, but it was clobbered due to address write-back
+if (auto CheckedReg = getRegMadeTrustedByChecking(Point, Cur))
+  Regs.push_back(*CheckedReg);
+
 return Regs;
   }
 
   // Returns all registers made trusted by this instruction.
   SmallVector getRegsMadeTrusted(const MCInst &Point,
 const SrcState &Cur) const {
+assert(!AuthTrapsOnFailure &&

[llvm-branch-commits] [llvm] [BOLT] Factor out MCInstReference from gadget scanner (NFC) (PR #138655)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/138655

>From a00612e35f986bf1eb383d3c7875adee2785a109 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Mon, 28 Apr 2025 18:35:48 +0300
Subject: [PATCH] [BOLT] Factor out MCInstReference from gadget scanner (NFC)

Move MCInstReference representing a constant reference to an instruction
inside a parent entity - either inside a basic block (which has a
reference to its parent function) or directly to the function (when CFG
information is not available).
---
 bolt/include/bolt/Core/MCInstUtils.h  | 168 +
 bolt/include/bolt/Passes/PAuthGadgetScanner.h | 178 +-
 bolt/lib/Core/CMakeLists.txt  |   1 +
 bolt/lib/Core/MCInstUtils.cpp |  57 ++
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 102 +-
 5 files changed, 269 insertions(+), 237 deletions(-)
 create mode 100644 bolt/include/bolt/Core/MCInstUtils.h
 create mode 100644 bolt/lib/Core/MCInstUtils.cpp

diff --git a/bolt/include/bolt/Core/MCInstUtils.h 
b/bolt/include/bolt/Core/MCInstUtils.h
new file mode 100644
index 0..69bf5e6159b74
--- /dev/null
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -0,0 +1,168 @@
+//===- bolt/Core/MCInstUtils.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef BOLT_CORE_MCINSTUTILS_H
+#define BOLT_CORE_MCINSTUTILS_H
+
+#include "bolt/Core/BinaryBasicBlock.h"
+
+#include 
+#include 
+#include 
+
+namespace llvm {
+namespace bolt {
+
+class BinaryFunction;
+
+/// MCInstReference represents a reference to a constant MCInst as stored 
either
+/// in a BinaryFunction (i.e. before a CFG is created), or in a 
BinaryBasicBlock
+/// (after a CFG is created).
+class MCInstReference {
+  using nocfg_const_iterator = std::map::const_iterator;
+
+  // Two cases are possible:
+  // * functions with CFG reconstructed - a function stores a collection of
+  //   basic blocks, each basic block stores a contiguous vector of MCInst
+  // * functions without CFG - there are no basic blocks created,
+  //   the instructions are directly stored in std::map in BinaryFunction
+  //
+  // In both cases, the direct parent of MCInst is stored together with an
+  // iterator pointing to the instruction.
+
+  // Helper struct: CFG is available, the direct parent is a basic block,
+  // iterator's type is `MCInst *`.
+  struct RefInBB {
+RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
+: BB(BB), It(Inst) {}
+RefInBB(const RefInBB &Other) = default;
+RefInBB &operator=(const RefInBB &Other) = default;
+
+const BinaryBasicBlock *BB;
+BinaryBasicBlock::const_iterator It;
+
+bool operator<(const RefInBB &Other) const {
+  return std::tie(BB, It) < std::tie(Other.BB, Other.It);
+}
+
+bool operator==(const RefInBB &Other) const {
+  return BB == Other.BB && It == Other.It;
+}
+  };
+
+  // Helper struct: CFG is *not* available, the direct parent is a function,
+  // iterator's type is std::map::iterator (the mapped value
+  // is an instruction's offset).
+  struct RefInBF {
+RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
+: BF(BF), It(It) {}
+RefInBF(const RefInBF &Other) = default;
+RefInBF &operator=(const RefInBF &Other) = default;
+
+const BinaryFunction *BF;
+nocfg_const_iterator It;
+
+bool operator<(const RefInBF &Other) const {
+  return std::tie(BF, It->first) < std::tie(Other.BF, Other.It->first);
+}
+
+bool operator==(const RefInBF &Other) const {
+  return BF == Other.BF && It->first == Other.It->first;
+}
+  };
+
+  std::variant Reference;
+
+  // Utility methods to be used like this:
+  //
+  // if (auto *Ref = tryGetRefInBB())
+  //   return Ref->doSomething(...);
+  // return getRefInBF().doSomethingElse(...);
+  const RefInBB *tryGetRefInBB() const {
+assert(std::get_if(&Reference) ||
+   std::get_if(&Reference));
+return std::get_if(&Reference);
+  }
+  const RefInBF &getRefInBF() const {
+assert(std::get_if(&Reference));
+return *std::get_if(&Reference);
+  }
+
+public:
+  /// Constructs an empty reference.
+  MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {}
+  /// Constructs a reference to the instruction inside the basic block.
+  MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst)
+  : Reference(RefInBB(BB, Inst)) {
+assert(BB && Inst && "Neither BB nor Inst should be nullptr");
+  }
+  /// Constructs a reference to the instruction inside the basic block.
+  MCInstReference(const BinaryBasicBlock *BB, unsigned Index)
+  : Reference(RefInBB(BB, &BB->getInstructionAtIndex(I

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect untrusted LR before tail call (PR #137224)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/137224

>From 41914bd7fd1eefb7cba31c0c825cc119d01b35b6 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 22 Apr 2025 21:43:14 +0300
Subject: [PATCH 1/2] [BOLT] Gadget scanner: detect untrusted LR before tail
 call

Implement the detection of tail calls performed with untrusted link
register, which violates the assumption made on entry to every function.

Unlike other pauth gadgets, this one involves some amount of guessing
which branch instructions should be checked as tail calls.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp|  94 ++-
 .../AArch64/gs-pacret-autiasp.s   |  31 +-
 .../AArch64/gs-pauth-debug-output.s   |  30 +-
 .../AArch64/gs-pauth-tail-calls.s | 597 ++
 4 files changed, 706 insertions(+), 46 deletions(-)
 create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-tail-calls.s

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index c34ca620bb50b..49087eab3ce9a 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -736,19 +736,14 @@ template  class CFGUnawareAnalysis {
 //
 // Then, a function can be split into a number of disjoint contiguous sequences
 // of instructions without labels in between. These sequences can be processed
-// the same way basic blocks are processed by data-flow analysis, assuming
-// pessimistically that all registers are unsafe at the start of each sequence.
+// the same way basic blocks are processed by data-flow analysis, with the same
+// pessimistic estimation of the initial state at the start of each sequence
+// (except the first instruction of the function).
 class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis,
 public CFGUnawareAnalysis {
   using SrcSafetyAnalysis::BC;
   BinaryFunction &BF;
 
-  /// Creates a state with all registers marked unsafe (not to be confused
-  /// with empty state).
-  SrcState createUnsafeState() const {
-return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
-  }
-
 public:
   CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF,
   MCPlusBuilder::AllocatorIdTy AllocId,
@@ -758,6 +753,7 @@ class CFGUnawareSrcSafetyAnalysis : public 
SrcSafetyAnalysis,
   }
 
   void run() override {
+const SrcState DefaultState = computePessimisticState(BF);
 SrcState S = createEntryState();
 for (auto &I : BF.instrs()) {
   MCInst &Inst = I.second;
@@ -772,7 +768,7 @@ class CFGUnawareSrcSafetyAnalysis : public 
SrcSafetyAnalysis,
 LLVM_DEBUG({
   traceInst(BC, "Due to label, resetting the state before", Inst);
 });
-S = createUnsafeState();
+S = DefaultState;
   }
 
   // Attach the state *before* this instruction executes.
@@ -1297,6 +1293,83 @@ shouldReportReturnGadget(const BinaryContext &BC, const 
MCInstReference &Inst,
   return make_gadget_report(RetKind, Inst, *RetReg);
 }
 
+/// While BOLT already marks some of the branch instructions as tail calls,
+/// this function tries to improve the coverage by including less obvious cases
+/// when it is possible to do without introducing too many false positives.
+static bool shouldAnalyzeTailCallInst(const BinaryContext &BC,
+  const BinaryFunction &BF,
+  const MCInstReference &Inst) {
+  // Some BC.MIB->isXYZ(Inst) methods simply delegate to MCInstrDesc::isXYZ()
+  // (such as isBranch at the time of writing this comment), some don't (such
+  // as isCall). For that reason, call MCInstrDesc's methods explicitly when
+  // it is important.
+  const MCInstrDesc &Desc =
+  BC.MII->get(static_cast(Inst).getOpcode());
+  // Tail call should be a branch (but not necessarily an indirect one).
+  if (!Desc.isBranch())
+return false;
+
+  // Always analyze the branches already marked as tail calls by BOLT.
+  if (BC.MIB->isTailCall(Inst))
+return true;
+
+  // Try to also check the branches marked as "UNKNOWN CONTROL FLOW" - the
+  // below is a simplified condition from BinaryContext::printInstruction.
+  bool IsUnknownControlFlow =
+  BC.MIB->isIndirectBranch(Inst) && !BC.MIB->getJumpTable(Inst);
+
+  if (BF.hasCFG() && IsUnknownControlFlow)
+return true;
+
+  return false;
+}
+
+static std::optional>
+shouldReportUnsafeTailCall(const BinaryContext &BC, const BinaryFunction &BF,
+   const MCInstReference &Inst, const SrcState &S) {
+  static const GadgetKind UntrustedLRKind(
+  "untrusted link register found before tail call");
+
+  if (!shouldAnalyzeTailCallInst(BC, BF, Inst))
+return std::nullopt;
+
+  // Not only the set of registers returned by getTrustedLiveInRegs() can be
+  // seen as a reasonable target-independent _approximation_ of "the LR", these
+  // are *exactly* those regis

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: prevent false positives due to jump tables (PR #138884)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/138884

>From e12f7b5b78cb96addbdb7c83d492fd491b68e773 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 6 May 2025 11:31:03 +0300
Subject: [PATCH] [BOLT] Gadget scanner: prevent false positives due to jump
 tables

As part of PAuth hardening, AArch64 LLVM backend can use a special
BR_JumpTable pseudo (enabled by -faarch64-jump-table-hardening
Clang option) which is expanded in the AsmPrinter into a contiguous
sequence without unsafe instructions in the middle.

This commit adds another target-specific callback to MCPlusBuilder
to make it possible to inhibit false positives for known-safe jump
table dispatch sequences. Without special handling, the branch
instruction is likely to be reported as a non-protected call (as its
destination is not produced by an auth instruction, PC-relative address
materialization, etc.) and possibly as a tail call being performed with
unsafe link register (as the detection whether the branch instruction
is a tail call is an heuristic).

For now, only the specific instruction sequence used by the AArch64
LLVM backend is matched.
---
 bolt/include/bolt/Core/MCInstUtils.h  |   9 +
 bolt/include/bolt/Core/MCPlusBuilder.h|  14 +
 bolt/lib/Core/MCInstUtils.cpp |  20 +
 bolt/lib/Passes/PAuthGadgetScanner.cpp|  10 +
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  73 ++
 .../AArch64/gs-pauth-jump-table.s | 703 ++
 6 files changed, 829 insertions(+)
 create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-jump-table.s

diff --git a/bolt/include/bolt/Core/MCInstUtils.h 
b/bolt/include/bolt/Core/MCInstUtils.h
index 50b7d56470c99..33d36cccbcfff 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -154,6 +154,15 @@ class MCInstReference {
 return nullptr;
   }
 
+  /// Returns the only preceding instruction, or std::nullopt if multiple or no
+  /// predecessors are possible.
+  ///
+  /// If CFG information is available, basic block boundary can be crossed,
+  /// provided there is exactly one predecessor. If CFG is not available, the
+  /// preceding instruction in the offset order is returned, unless this is the
+  /// first instruction of the function.
+  std::optional getSinglePredecessor();
+
   raw_ostream &print(raw_ostream &OS) const;
 };
 
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h 
b/bolt/include/bolt/Core/MCPlusBuilder.h
index c8cbcaf33f4b5..3abf4d18e94da 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -14,6 +14,7 @@
 #ifndef BOLT_CORE_MCPLUSBUILDER_H
 #define BOLT_CORE_MCPLUSBUILDER_H
 
+#include "bolt/Core/MCInstUtils.h"
 #include "bolt/Core/MCPlus.h"
 #include "bolt/Core/Relocation.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -700,6 +701,19 @@ class MCPlusBuilder {
 return std::nullopt;
   }
 
+  /// Tests if BranchInst corresponds to an instruction sequence which is known
+  /// to be a safe dispatch via jump table.
+  ///
+  /// The target can decide which instruction sequences to consider "safe" from
+  /// the Pointer Authentication point of view, such as any jump table dispatch
+  /// sequence without function calls inside, any sequence which is contiguous,
+  /// or only some specific well-known sequences.
+  virtual bool
+  isSafeJumpTableBranchForPtrAuth(MCInstReference BranchInst) const {
+llvm_unreachable("not implemented");
+return false;
+  }
+
   virtual bool isTerminator(const MCInst &Inst) const;
 
   virtual bool isNoop(const MCInst &Inst) const {
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
index 40f6edd59135c..b7c6d898988af 100644
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -55,3 +55,23 @@ raw_ostream &MCInstReference::print(raw_ostream &OS) const {
   OS << ">";
   return OS;
 }
+
+std::optional MCInstReference::getSinglePredecessor() {
+  if (const RefInBB *Ref = tryGetRefInBB()) {
+if (Ref->It != Ref->BB->begin())
+  return MCInstReference(Ref->BB, &*std::prev(Ref->It));
+
+if (Ref->BB->pred_size() != 1)
+  return std::nullopt;
+
+BinaryBasicBlock *PredBB = *Ref->BB->pred_begin();
+assert(!PredBB->empty() && "Empty basic blocks are not supported yet");
+return MCInstReference(PredBB, &*PredBB->rbegin());
+  }
+
+  const RefInBF &Ref = getRefInBF();
+  if (Ref.It == Ref.BF->instrs().begin())
+return std::nullopt;
+
+  return MCInstReference(Ref.BF, std::prev(Ref.It));
+}
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 573db1856a134..f979b1480d0b1 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -1337,6 +1337,11 @@ shouldReportUnsafeTailCall(const BinaryContext &BC, 
const BinaryFunction &BF,
 return std::nullopt;
   }
 
+  if (BC.MIB->isSafeJumpTableBranchForPtrAuth(Inst)) {
+LL

[llvm-branch-commits] [llvm] [BOLT] Factor out MCInstReference from gadget scanner (NFC) (PR #138655)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/138655

>From a00612e35f986bf1eb383d3c7875adee2785a109 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Mon, 28 Apr 2025 18:35:48 +0300
Subject: [PATCH] [BOLT] Factor out MCInstReference from gadget scanner (NFC)

Move MCInstReference representing a constant reference to an instruction
inside a parent entity - either inside a basic block (which has a
reference to its parent function) or directly to the function (when CFG
information is not available).
---
 bolt/include/bolt/Core/MCInstUtils.h  | 168 +
 bolt/include/bolt/Passes/PAuthGadgetScanner.h | 178 +-
 bolt/lib/Core/CMakeLists.txt  |   1 +
 bolt/lib/Core/MCInstUtils.cpp |  57 ++
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 102 +-
 5 files changed, 269 insertions(+), 237 deletions(-)
 create mode 100644 bolt/include/bolt/Core/MCInstUtils.h
 create mode 100644 bolt/lib/Core/MCInstUtils.cpp

diff --git a/bolt/include/bolt/Core/MCInstUtils.h 
b/bolt/include/bolt/Core/MCInstUtils.h
new file mode 100644
index 0..69bf5e6159b74
--- /dev/null
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -0,0 +1,168 @@
+//===- bolt/Core/MCInstUtils.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef BOLT_CORE_MCINSTUTILS_H
+#define BOLT_CORE_MCINSTUTILS_H
+
+#include "bolt/Core/BinaryBasicBlock.h"
+
+#include 
+#include 
+#include 
+
+namespace llvm {
+namespace bolt {
+
+class BinaryFunction;
+
+/// MCInstReference represents a reference to a constant MCInst as stored 
either
+/// in a BinaryFunction (i.e. before a CFG is created), or in a 
BinaryBasicBlock
+/// (after a CFG is created).
+class MCInstReference {
+  using nocfg_const_iterator = std::map::const_iterator;
+
+  // Two cases are possible:
+  // * functions with CFG reconstructed - a function stores a collection of
+  //   basic blocks, each basic block stores a contiguous vector of MCInst
+  // * functions without CFG - there are no basic blocks created,
+  //   the instructions are directly stored in std::map in BinaryFunction
+  //
+  // In both cases, the direct parent of MCInst is stored together with an
+  // iterator pointing to the instruction.
+
+  // Helper struct: CFG is available, the direct parent is a basic block,
+  // iterator's type is `MCInst *`.
+  struct RefInBB {
+RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
+: BB(BB), It(Inst) {}
+RefInBB(const RefInBB &Other) = default;
+RefInBB &operator=(const RefInBB &Other) = default;
+
+const BinaryBasicBlock *BB;
+BinaryBasicBlock::const_iterator It;
+
+bool operator<(const RefInBB &Other) const {
+  return std::tie(BB, It) < std::tie(Other.BB, Other.It);
+}
+
+bool operator==(const RefInBB &Other) const {
+  return BB == Other.BB && It == Other.It;
+}
+  };
+
+  // Helper struct: CFG is *not* available, the direct parent is a function,
+  // iterator's type is std::map::iterator (the mapped value
+  // is an instruction's offset).
+  struct RefInBF {
+RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
+: BF(BF), It(It) {}
+RefInBF(const RefInBF &Other) = default;
+RefInBF &operator=(const RefInBF &Other) = default;
+
+const BinaryFunction *BF;
+nocfg_const_iterator It;
+
+bool operator<(const RefInBF &Other) const {
+  return std::tie(BF, It->first) < std::tie(Other.BF, Other.It->first);
+}
+
+bool operator==(const RefInBF &Other) const {
+  return BF == Other.BF && It->first == Other.It->first;
+}
+  };
+
+  std::variant Reference;
+
+  // Utility methods to be used like this:
+  //
+  // if (auto *Ref = tryGetRefInBB())
+  //   return Ref->doSomething(...);
+  // return getRefInBF().doSomethingElse(...);
+  const RefInBB *tryGetRefInBB() const {
+assert(std::get_if(&Reference) ||
+   std::get_if(&Reference));
+return std::get_if(&Reference);
+  }
+  const RefInBF &getRefInBF() const {
+assert(std::get_if(&Reference));
+return *std::get_if(&Reference);
+  }
+
+public:
+  /// Constructs an empty reference.
+  MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {}
+  /// Constructs a reference to the instruction inside the basic block.
+  MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst)
+  : Reference(RefInBB(BB, Inst)) {
+assert(BB && Inst && "Neither BB nor Inst should be nullptr");
+  }
+  /// Constructs a reference to the instruction inside the basic block.
+  MCInstReference(const BinaryBasicBlock *BB, unsigned Index)
+  : Reference(RefInBB(BB, &BB->getInstructionAtIndex(I

[llvm-branch-commits] [llvm] [BOLT] Introduce helpers to match `MCInst`s one at a time (NFC) (PR #138883)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/138883

>From 7873996c4cd0e41cbae88060ef17f9168922476c Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Wed, 7 May 2025 16:42:00 +0300
Subject: [PATCH] [BOLT] Introduce helpers to match `MCInst`s one at a time
 (NFC)

Introduce matchInst helper function to capture and/or match the operands
of MCInst. Unlike the existing `MCPlusBuilder::MCInstMatcher` machinery,
matchInst is intended for the use cases when precise control over the
instruction order is required. For example, when validating PtrAuth
hardening, all registers are usually considered unsafe after a function
call, even though callee-saved registers should preserve their old
values *under normal operation*.
---
 bolt/include/bolt/Core/MCInstUtils.h  | 128 ++
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  90 +---
 2 files changed, 162 insertions(+), 56 deletions(-)

diff --git a/bolt/include/bolt/Core/MCInstUtils.h 
b/bolt/include/bolt/Core/MCInstUtils.h
index 69bf5e6159b74..50b7d56470c99 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -162,6 +162,134 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
   return Ref.print(OS);
 }
 
+/// Instruction-matching helpers operating on a single instruction at a time.
+///
+/// Unlike MCPlusBuilder::MCInstMatcher, this matchInst() function focuses on
+/// the cases where a precise control over the instruction order is important:
+///
+/// // Bring the short names into the local scope:
+/// using namespace MCInstMatcher;
+/// // Declare the registers to capture:
+/// Reg Xn, Xm;
+/// // Capture the 0th and 1st operands, match the 2nd operand against the
+/// // just captured Xm register, match the 3rd operand against literal 0:
+/// if (!matchInst(MaybeAdd, AArch64::ADDXrs, Xm, Xn, Xm, Imm(0))
+///   return AArch64::NoRegister;
+/// // Match the 0th operand against Xm:
+/// if (!matchInst(MaybeBr, AArch64::BR, Xm))
+///   return AArch64::NoRegister;
+/// // Return the matched register:
+/// return Xm.get();
+namespace MCInstMatcher {
+
+// The base class to match an operand of type T.
+//
+// The subclasses of OpMatcher are intended to be allocated on the stack and
+// to only be used by passing them to matchInst() and by calling their get()
+// function, thus the peculiar `mutable` specifiers: to make the calling code
+// compact and readable, the templated matchInst() function has to accept both
+// long-lived Imm/Reg wrappers declared as local variables (intended to capture
+// the first operand's value and match the subsequent operands, whether inside
+// a single instruction or across multiple instructions), as well as temporary
+// wrappers around literal values to match, f.e. Imm(42) or Reg(AArch64::XZR).
+template  class OpMatcher {
+  mutable std::optional Value;
+  mutable std::optional SavedValue;
+
+  // Remember/restore the last Value - to be called by matchInst.
+  void remember() const { SavedValue = Value; }
+  void restore() const { Value = SavedValue; }
+
+  template 
+  friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
+
+protected:
+  OpMatcher(std::optional ValueToMatch) : Value(ValueToMatch) {}
+
+  bool matchValue(T OpValue) const {
+// Check that OpValue does not contradict the existing Value.
+bool MatchResult = !Value || *Value == OpValue;
+// If MatchResult is false, all matchers will be reset before returning 
from
+// matchInst, including this one, thus no need to assign conditionally.
+Value = OpValue;
+
+return MatchResult;
+  }
+
+public:
+  /// Returns the captured value.
+  T get() const {
+assert(Value.has_value());
+return *Value;
+  }
+};
+
+class Reg : public OpMatcher {
+  bool matches(const MCOperand &Op) const {
+if (!Op.isReg())
+  return false;
+
+return matchValue(Op.getReg());
+  }
+
+  template 
+  friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
+
+public:
+  Reg(std::optional RegToMatch = std::nullopt)
+  : OpMatcher(RegToMatch) {}
+};
+
+class Imm : public OpMatcher {
+  bool matches(const MCOperand &Op) const {
+if (!Op.isImm())
+  return false;
+
+return matchValue(Op.getImm());
+  }
+
+  template 
+  friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
+
+public:
+  Imm(std::optional ImmToMatch = std::nullopt)
+  : OpMatcher(ImmToMatch) {}
+};
+
+/// Tries to match Inst and updates Ops on success.
+///
+/// If Inst has the specified Opcode and its operand list prefix matches Ops,
+/// this function returns true and updates Ops, otherwise false is returned and
+/// values of Ops are kept as before matchInst was called.
+///
+/// Please note that while Ops are technically passed by a const reference to
+/// make invocations like `matchInst(MI, Opcode, Imm(42))` possible, all their
+/// fields are marked mut

[llvm-branch-commits] [clang] release/20.x: [clang-format] Handle Java text blocks (#141334) (PR #141433)

2025-05-26 Thread Björn Schäpers via llvm-branch-commits

https://github.com/HazardyKnusperkeks approved this pull request.


https://github.com/llvm/llvm-project/pull/141433
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: improve handling of unreachable basic blocks (PR #136183)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/136183

>From 8e8047416670662a22a98777429eb37511546d33 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Thu, 17 Apr 2025 20:51:16 +0300
Subject: [PATCH 1/3] [BOLT] Gadget scanner: improve handling of unreachable
 basic blocks

Instead of refusing to analyze an instruction completely, when it is
unreachable according to the CFG reconstructed by BOLT, pessimistically
assume all registers to be unsafe at the start of basic blocks without
any predecessors. Nevertheless, unreachable basic blocks found in
optimized code likely means imprecise CFG reconstruction, thus report a
warning once per basic block without predecessors.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 46 ++-
 .../AArch64/gs-pacret-autiasp.s   |  7 ++-
 .../binary-analysis/AArch64/gs-pauth-calls.s  | 57 +++
 3 files changed, 95 insertions(+), 15 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index adf6d57ac..c0e01bb7aa845 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -341,6 +341,12 @@ class SrcSafetyAnalysis {
 return S;
   }
 
+  /// Creates a state with all registers marked unsafe (not to be confused
+  /// with empty state).
+  SrcState createUnsafeState() const {
+return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
+  }
+
   BitVector getClobberedRegs(const MCInst &Point) const {
 BitVector Clobbered(NumRegs);
 // Assume a call can clobber all registers, including callee-saved
@@ -584,6 +590,13 @@ class DataflowSrcSafetyAnalysis
 if (BB.isEntryPoint())
   return createEntryState();
 
+// If a basic block without any predecessors is found in an optimized code,
+// this likely means that some CFG edges were not detected. Pessimistically
+// assume all registers to be unsafe before this basic block and warn about
+// this fact in FunctionAnalysis::findUnsafeUses().
+if (BB.pred_empty())
+  return createUnsafeState();
+
 return SrcState();
   }
 
@@ -688,12 +701,6 @@ class CFGUnawareSrcSafetyAnalysis : public 
SrcSafetyAnalysis,
   using SrcSafetyAnalysis::BC;
   BinaryFunction &BF;
 
-  /// Creates a state with all registers marked unsafe (not to be confused
-  /// with empty state).
-  SrcState createUnsafeState() const {
-return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
-  }
-
 public:
   CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF,
   MCPlusBuilder::AllocatorIdTy AllocId,
@@ -1350,19 +1357,30 @@ void FunctionAnalysisContext::findUnsafeUses(
 BF.dump();
   });
 
+  if (BF.hasCFG()) {
+// Warn on basic blocks being unreachable according to BOLT, as this
+// likely means CFG is imprecise.
+for (BinaryBasicBlock &BB : BF) {
+  if (!BB.pred_empty() || BB.isEntryPoint())
+continue;
+  // Arbitrarily attach the report to the first instruction of BB.
+  MCInst *InstToReport = BB.getFirstNonPseudoInstr();
+  if (!InstToReport)
+continue; // BB has no real instructions
+
+  Reports.push_back(
+  make_generic_report(MCInstReference::get(InstToReport, BF),
+  "Warning: no predecessor basic blocks detected "
+  "(possibly incomplete CFG)"));
+}
+  }
+
   iterateOverInstrs(BF, [&](MCInstReference Inst) {
 if (BC.MIB->isCFI(Inst))
   return;
 
 const SrcState &S = Analysis->getStateBefore(Inst);
-
-// If non-empty state was never propagated from the entry basic block
-// to Inst, assume it to be unreachable and report a warning.
-if (S.empty()) {
-  Reports.push_back(
-  make_generic_report(Inst, "Warning: unreachable instruction found"));
-  return;
-}
+assert(!S.empty() && "Instruction has no associated state");
 
 if (auto Report = shouldReportReturnGadget(BC, Inst, S))
   Reports.push_back(*Report);
diff --git a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s 
b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
index 284f0bea607a5..6559ba336e8de 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
@@ -215,12 +215,17 @@ f_callclobbered_calleesaved:
 .globl  f_unreachable_instruction
 .type   f_unreachable_instruction,@function
 f_unreachable_instruction:
-// CHECK-LABEL: GS-PAUTH: Warning: unreachable instruction found in function 
f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address
+// CHECK-LABEL: GS-PAUTH: Warning: no predecessor basic blocks detected 
(possibly incomplete CFG) in function f_unreachable_instruction, basic block 
{{[0-9a-zA-Z.]+}}, at address
 // CHECK-NEXT:The instruction is {{[0-9a-f]+}}:   add x0, x1, 
x2
 // CHECK-NOT:   instructions that write t

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: do not crash on debug-printing CFI instructions (PR #136151)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/136151

>From 1e97d81fb85bd2210ead550e46f703c979dec8f7 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 15 Apr 2025 21:47:18 +0300
Subject: [PATCH] [BOLT] Gadget scanner: do not crash on debug-printing CFI
 instructions

Some instruction-printing code used under LLVM_DEBUG does not handle CFI
instructions well. While CFI instructions seem to be harmless for the
correctness of the analysis results, they do not convey any useful
information to the analysis either, so skip them early.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 16 ++
 .../AArch64/gs-pauth-debug-output.s   | 32 +++
 2 files changed, 48 insertions(+)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 99d078931c9e9..adf6d57ac 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -429,6 +429,9 @@ class SrcSafetyAnalysis {
   }
 
   SrcState computeNext(const MCInst &Point, const SrcState &Cur) {
+if (BC.MIB->isCFI(Point))
+  return Cur;
+
 SrcStatePrinter P(BC);
 LLVM_DEBUG({
   dbgs() << "  SrcSafetyAnalysis::ComputeNext(";
@@ -703,6 +706,8 @@ class CFGUnawareSrcSafetyAnalysis : public 
SrcSafetyAnalysis,
 SrcState S = createEntryState();
 for (auto &I : BF.instrs()) {
   MCInst &Inst = I.second;
+  if (BC.MIB->isCFI(Inst))
+continue;
 
   // If there is a label before this instruction, it is possible that it
   // can be jumped-to, thus conservatively resetting S. As an exception,
@@ -982,6 +987,9 @@ class DstSafetyAnalysis {
   }
 
   DstState computeNext(const MCInst &Point, const DstState &Cur) {
+if (BC.MIB->isCFI(Point))
+  return Cur;
+
 DstStatePrinter P(BC);
 LLVM_DEBUG({
   dbgs() << "  DstSafetyAnalysis::ComputeNext(";
@@ -1151,6 +1159,8 @@ class CFGUnawareDstSafetyAnalysis : public 
DstSafetyAnalysis,
 DstState S = createUnsafeState();
 for (auto &I : llvm::reverse(BF.instrs())) {
   MCInst &Inst = I.second;
+  if (BC.MIB->isCFI(Inst))
+continue;
 
   // If Inst can change the control flow, we cannot be sure that the next
   // instruction (to be executed in analyzed program) is the one processed
@@ -1341,6 +1351,9 @@ void FunctionAnalysisContext::findUnsafeUses(
   });
 
   iterateOverInstrs(BF, [&](MCInstReference Inst) {
+if (BC.MIB->isCFI(Inst))
+  return;
+
 const SrcState &S = Analysis->getStateBefore(Inst);
 
 // If non-empty state was never propagated from the entry basic block
@@ -1404,6 +1417,9 @@ void FunctionAnalysisContext::findUnsafeDefs(
   });
 
   iterateOverInstrs(BF, [&](MCInstReference Inst) {
+if (BC.MIB->isCFI(Inst))
+  return;
+
 const DstState &S = Analysis->getStateAfter(Inst);
 
 if (auto Report = shouldReportAuthOracle(BC, Inst, S))
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s 
b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
index 61aa84377b88e..5aec945621987 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
@@ -329,6 +329,38 @@ auth_oracle:
 // PAUTH-EMPTY:
 // PAUTH-NEXT:   Attaching leakage info to: :  autia   x0, x1 
# DataflowDstSafetyAnalysis: dst-state
 
+// Gadget scanner should not crash on CFI instructions, including when 
debug-printing them.
+// Note that the particular debug output is not checked, but BOLT should be
+// compiled with assertions enabled to support -debug-only argument.
+
+.globl  cfi_inst_df
+.type   cfi_inst_df,@function
+cfi_inst_df:
+.cfi_startproc
+sub sp, sp, #16
+.cfi_def_cfa_offset 16
+add sp, sp, #16
+.cfi_def_cfa_offset 0
+ret
+.size   cfi_inst_df, .-cfi_inst_df
+.cfi_endproc
+
+.globl  cfi_inst_nocfg
+.type   cfi_inst_nocfg,@function
+cfi_inst_nocfg:
+.cfi_startproc
+sub sp, sp, #16
+.cfi_def_cfa_offset 16
+
+adr x0, 1f
+br  x0
+1:
+add sp, sp, #16
+.cfi_def_cfa_offset 0
+ret
+.size   cfi_inst_nocfg, .-cfi_inst_nocfg
+.cfi_endproc
+
 // CHECK-LABEL:Analyzing function main, AllocatorId = 1
 .globl  main
 .type   main,@function

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect untrusted LR before tail call (PR #137224)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/137224

>From 41914bd7fd1eefb7cba31c0c825cc119d01b35b6 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 22 Apr 2025 21:43:14 +0300
Subject: [PATCH 1/2] [BOLT] Gadget scanner: detect untrusted LR before tail
 call

Implement the detection of tail calls performed with untrusted link
register, which violates the assumption made on entry to every function.

Unlike other pauth gadgets, this one involves some amount of guessing
which branch instructions should be checked as tail calls.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp|  94 ++-
 .../AArch64/gs-pacret-autiasp.s   |  31 +-
 .../AArch64/gs-pauth-debug-output.s   |  30 +-
 .../AArch64/gs-pauth-tail-calls.s | 597 ++
 4 files changed, 706 insertions(+), 46 deletions(-)
 create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-tail-calls.s

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index c34ca620bb50b..49087eab3ce9a 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -736,19 +736,14 @@ template  class CFGUnawareAnalysis {
 //
 // Then, a function can be split into a number of disjoint contiguous sequences
 // of instructions without labels in between. These sequences can be processed
-// the same way basic blocks are processed by data-flow analysis, assuming
-// pessimistically that all registers are unsafe at the start of each sequence.
+// the same way basic blocks are processed by data-flow analysis, with the same
+// pessimistic estimation of the initial state at the start of each sequence
+// (except the first instruction of the function).
 class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis,
 public CFGUnawareAnalysis {
   using SrcSafetyAnalysis::BC;
   BinaryFunction &BF;
 
-  /// Creates a state with all registers marked unsafe (not to be confused
-  /// with empty state).
-  SrcState createUnsafeState() const {
-return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
-  }
-
 public:
   CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF,
   MCPlusBuilder::AllocatorIdTy AllocId,
@@ -758,6 +753,7 @@ class CFGUnawareSrcSafetyAnalysis : public 
SrcSafetyAnalysis,
   }
 
   void run() override {
+const SrcState DefaultState = computePessimisticState(BF);
 SrcState S = createEntryState();
 for (auto &I : BF.instrs()) {
   MCInst &Inst = I.second;
@@ -772,7 +768,7 @@ class CFGUnawareSrcSafetyAnalysis : public 
SrcSafetyAnalysis,
 LLVM_DEBUG({
   traceInst(BC, "Due to label, resetting the state before", Inst);
 });
-S = createUnsafeState();
+S = DefaultState;
   }
 
   // Attach the state *before* this instruction executes.
@@ -1297,6 +1293,83 @@ shouldReportReturnGadget(const BinaryContext &BC, const 
MCInstReference &Inst,
   return make_gadget_report(RetKind, Inst, *RetReg);
 }
 
+/// While BOLT already marks some of the branch instructions as tail calls,
+/// this function tries to improve the coverage by including less obvious cases
+/// when it is possible to do without introducing too many false positives.
+static bool shouldAnalyzeTailCallInst(const BinaryContext &BC,
+  const BinaryFunction &BF,
+  const MCInstReference &Inst) {
+  // Some BC.MIB->isXYZ(Inst) methods simply delegate to MCInstrDesc::isXYZ()
+  // (such as isBranch at the time of writing this comment), some don't (such
+  // as isCall). For that reason, call MCInstrDesc's methods explicitly when
+  // it is important.
+  const MCInstrDesc &Desc =
+  BC.MII->get(static_cast(Inst).getOpcode());
+  // Tail call should be a branch (but not necessarily an indirect one).
+  if (!Desc.isBranch())
+return false;
+
+  // Always analyze the branches already marked as tail calls by BOLT.
+  if (BC.MIB->isTailCall(Inst))
+return true;
+
+  // Try to also check the branches marked as "UNKNOWN CONTROL FLOW" - the
+  // below is a simplified condition from BinaryContext::printInstruction.
+  bool IsUnknownControlFlow =
+  BC.MIB->isIndirectBranch(Inst) && !BC.MIB->getJumpTable(Inst);
+
+  if (BF.hasCFG() && IsUnknownControlFlow)
+return true;
+
+  return false;
+}
+
+static std::optional>
+shouldReportUnsafeTailCall(const BinaryContext &BC, const BinaryFunction &BF,
+   const MCInstReference &Inst, const SrcState &S) {
+  static const GadgetKind UntrustedLRKind(
+  "untrusted link register found before tail call");
+
+  if (!shouldAnalyzeTailCallInst(BC, BF, Inst))
+return std::nullopt;
+
+  // Not only the set of registers returned by getTrustedLiveInRegs() can be
+  // seen as a reasonable target-independent _approximation_ of "the LR", these
+  // are *exactly* those regis

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc (PR #132383)

2025-05-26 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/132383

>From 97161545fc7d53077ec52f9b6abdcc5caf78fff9 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 8 May 2025 12:03:28 +0200
Subject: [PATCH] AMDGPU/GlobalISel: add RegBankLegalize rules for extends and
 trunc

Uniform S1:
Truncs to uniform S1 and AnyExts from S1 are left as is as they are meant
to be combined away. Uniform S1 ZExt and SExt are lowered using select.
Divergent S1:
Trunc of VGPR to VCC is lowered as compare.
Extends of VCC are lowered using select.

For remaining types:
S32 to S64 ZExt and SExt are lowered using merge values, AnyExt and Trunc
are again left as is to be combined away.
Notably uniform S16 for SExt and Zext is not lowered to S32 and left as is
for instruction select to deal with them. This is because there are patterns
that check for S16 type.
---
 .../Target/AMDGPU/AMDGPURegBankLegalize.cpp   |   7 ++
 .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 110 +-
 .../AMDGPU/AMDGPURegBankLegalizeHelper.h  |   1 +
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp |  47 +++-
 .../AMDGPU/AMDGPURegBankLegalizeRules.h   |   3 +
 .../GlobalISel/regbankselect-and-s1.mir   | 105 +
 .../GlobalISel/regbankselect-anyext.mir   |  59 +-
 .../AMDGPU/GlobalISel/regbankselect-sext.mir  | 100 ++--
 .../AMDGPU/GlobalISel/regbankselect-trunc.mir |  22 +++-
 .../AMDGPU/GlobalISel/regbankselect-zext.mir  |  89 +-
 10 files changed, 360 insertions(+), 183 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 4d8d3022b8080..4f9beeaacfaee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -213,6 +213,13 @@ class AMDGPURegBankLegalizeCombiner {
   return;
 }
 
+if (DstTy == S64 && TruncSrcTy == S32) {
+  B.buildMergeLikeInstr(MI.getOperand(0).getReg(),
+{TruncSrc, B.buildUndef({SgprRB, S32})});
+  cleanUpAfterCombine(MI, Trunc);
+  return;
+}
+
 if (DstTy == S32 && TruncSrcTy == S16) {
   B.buildAnyExt(Dst, TruncSrc);
   cleanUpAfterCombine(MI, Trunc);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 050b6302a98c5..658982ceb68fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -133,6 +133,43 @@ void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, 
LLT WideTy,
   MI.eraseFromParent();
 }
 
+void RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr &MI) {
+  Register Dst = MI.getOperand(0).getReg();
+  LLT Ty = MRI.getType(Dst);
+  Register Src = MI.getOperand(1).getReg();
+  unsigned Opc = MI.getOpcode();
+  int TrueExtCst = Opc == G_SEXT ? -1 : 1;
+  if (Ty == S32 || Ty == S16) {
+auto True = B.buildConstant({VgprRB, Ty}, TrueExtCst);
+auto False = B.buildConstant({VgprRB, Ty}, 0);
+B.buildSelect(Dst, Src, True, False);
+  } else if (Ty == S64) {
+auto True = B.buildConstant({VgprRB_S32}, TrueExtCst);
+auto False = B.buildConstant({VgprRB_S32}, 0);
+auto Lo = B.buildSelect({VgprRB_S32}, Src, True, False);
+MachineInstrBuilder Hi;
+switch (Opc) {
+case G_SEXT:
+  Hi = Lo;
+  break;
+case G_ZEXT:
+  Hi = False;
+  break;
+case G_ANYEXT:
+  Hi = B.buildUndef({VgprRB_S32});
+  break;
+default:
+  llvm_unreachable("Opcode not supported");
+}
+
+B.buildMergeValues(Dst, {Lo.getReg(0), Hi.getReg(0)});
+  } else {
+llvm_unreachable("Type not supported");
+  }
+
+  MI.eraseFromParent();
+}
+
 static bool isSignedBFE(MachineInstr &MI) {
   if (GIntrinsic *GI = dyn_cast(&MI))
 return (GI->is(Intrinsic::amdgcn_sbfe));
@@ -256,26 +293,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
   switch (Mapping.LoweringMethod) {
   case DoNotLower:
 return;
-  case VccExtToSel: {
-LLT Ty = MRI.getType(MI.getOperand(0).getReg());
-Register Src = MI.getOperand(1).getReg();
-unsigned Opc = MI.getOpcode();
-if (Ty == S32 || Ty == S16) {
-  auto True = B.buildConstant({VgprRB, Ty}, Opc == G_SEXT ? -1 : 1);
-  auto False = B.buildConstant({VgprRB, Ty}, 0);
-  B.buildSelect(MI.getOperand(0).getReg(), Src, True, False);
-}
-if (Ty == S64) {
-  auto True = B.buildConstant({VgprRB, S32}, Opc == G_SEXT ? -1 : 1);
-  auto False = B.buildConstant({VgprRB, S32}, 0);
-  auto Sel = B.buildSelect({VgprRB, S32}, Src, True, False);
-  B.buildMergeValues(
-  MI.getOperand(0).getReg(),
-  {Sel.getReg(0), Opc == G_SEXT ? Sel.getReg(0) : False.getReg(0)});
-}
-MI.eraseFromParent();
-return;
-  }
+  case VccExtToSel:
+return lowerVccExtToSel(MI);
   case UniExtToSel: {
 LLT Ty = MRI.getType(MI.getOperand(0).g

[llvm-branch-commits] [Github][CI] Make the new premerge system fail jobs on errors (PR #139359)

2025-05-26 Thread Nathan Gauër via llvm-branch-commits

https://github.com/Keenuts approved this pull request.


https://github.com/llvm/llvm-project/pull/139359
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] compiler-rt: Introduce runtime functions for emulated PAC. (PR #133530)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

atrosinenko wrote:

Since this PR enables another language (CXX in addition to C and ASM) in the 
`builtins` library, it is probably worth reviewing by someone familiar with 
CMake build descriptions used in LLVM.

https://github.com/llvm/llvm-project/pull/133530
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: prevent false positives due to jump tables (PR #138884)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/138884

>From c2a64ee8c1f0446ce98defb3a0df8230521f66aa Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 6 May 2025 11:31:03 +0300
Subject: [PATCH] [BOLT] Gadget scanner: prevent false positives due to jump
 tables

As part of PAuth hardening, AArch64 LLVM backend can use a special
BR_JumpTable pseudo (enabled by -faarch64-jump-table-hardening
Clang option) which is expanded in the AsmPrinter into a contiguous
sequence without unsafe instructions in the middle.

This commit adds another target-specific callback to MCPlusBuilder
to make it possible to inhibit false positives for known-safe jump
table dispatch sequences. Without special handling, the branch
instruction is likely to be reported as a non-protected call (as its
destination is not produced by an auth instruction, PC-relative address
materialization, etc.) and possibly as a tail call being performed with
unsafe link register (as the detection whether the branch instruction
is a tail call is an heuristic).

For now, only the specific instruction sequence used by the AArch64
LLVM backend is matched.
---
 bolt/include/bolt/Core/MCInstUtils.h  |   9 +
 bolt/include/bolt/Core/MCPlusBuilder.h|  14 +
 bolt/lib/Core/MCInstUtils.cpp |  20 +
 bolt/lib/Passes/PAuthGadgetScanner.cpp|  10 +
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  73 ++
 .../AArch64/gs-pauth-jump-table.s | 703 ++
 6 files changed, 829 insertions(+)
 create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-jump-table.s

diff --git a/bolt/include/bolt/Core/MCInstUtils.h 
b/bolt/include/bolt/Core/MCInstUtils.h
index 50b7d56470c99..33d36cccbcfff 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -154,6 +154,15 @@ class MCInstReference {
 return nullptr;
   }
 
+  /// Returns the only preceding instruction, or std::nullopt if multiple or no
+  /// predecessors are possible.
+  ///
+  /// If CFG information is available, basic block boundary can be crossed,
+  /// provided there is exactly one predecessor. If CFG is not available, the
+  /// preceding instruction in the offset order is returned, unless this is the
+  /// first instruction of the function.
+  std::optional getSinglePredecessor();
+
   raw_ostream &print(raw_ostream &OS) const;
 };
 
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h 
b/bolt/include/bolt/Core/MCPlusBuilder.h
index c8cbcaf33f4b5..3abf4d18e94da 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -14,6 +14,7 @@
 #ifndef BOLT_CORE_MCPLUSBUILDER_H
 #define BOLT_CORE_MCPLUSBUILDER_H
 
+#include "bolt/Core/MCInstUtils.h"
 #include "bolt/Core/MCPlus.h"
 #include "bolt/Core/Relocation.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -700,6 +701,19 @@ class MCPlusBuilder {
 return std::nullopt;
   }
 
+  /// Tests if BranchInst corresponds to an instruction sequence which is known
+  /// to be a safe dispatch via jump table.
+  ///
+  /// The target can decide which instruction sequences to consider "safe" from
+  /// the Pointer Authentication point of view, such as any jump table dispatch
+  /// sequence without function calls inside, any sequence which is contiguous,
+  /// or only some specific well-known sequences.
+  virtual bool
+  isSafeJumpTableBranchForPtrAuth(MCInstReference BranchInst) const {
+llvm_unreachable("not implemented");
+return false;
+  }
+
   virtual bool isTerminator(const MCInst &Inst) const;
 
   virtual bool isNoop(const MCInst &Inst) const {
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
index 40f6edd59135c..b7c6d898988af 100644
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -55,3 +55,23 @@ raw_ostream &MCInstReference::print(raw_ostream &OS) const {
   OS << ">";
   return OS;
 }
+
+std::optional MCInstReference::getSinglePredecessor() {
+  if (const RefInBB *Ref = tryGetRefInBB()) {
+if (Ref->It != Ref->BB->begin())
+  return MCInstReference(Ref->BB, &*std::prev(Ref->It));
+
+if (Ref->BB->pred_size() != 1)
+  return std::nullopt;
+
+BinaryBasicBlock *PredBB = *Ref->BB->pred_begin();
+assert(!PredBB->empty() && "Empty basic blocks are not supported yet");
+return MCInstReference(PredBB, &*PredBB->rbegin());
+  }
+
+  const RefInBF &Ref = getRefInBF();
+  if (Ref.It == Ref.BF->instrs().begin())
+return std::nullopt;
+
+  return MCInstReference(Ref.BF, std::prev(Ref.It));
+}
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 573db1856a134..f979b1480d0b1 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -1337,6 +1337,11 @@ shouldReportUnsafeTailCall(const BinaryContext &BC, 
const BinaryFunction &BF,
 return std::nullopt;
   }
 
+  if (BC.MIB->isSafeJumpTableBranchForPtrAuth(Inst)) {
+LL

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: do not crash on debug-printing CFI instructions (PR #136151)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/136151

>From 36837ce02c564ee5b27a1b3bad164b1e77d93bb7 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 15 Apr 2025 21:47:18 +0300
Subject: [PATCH] [BOLT] Gadget scanner: do not crash on debug-printing CFI
 instructions

Some instruction-printing code used under LLVM_DEBUG does not handle CFI
instructions well. While CFI instructions seem to be harmless for the
correctness of the analysis results, they do not convey any useful
information to the analysis either, so skip them early.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 16 ++
 .../AArch64/gs-pauth-debug-output.s   | 32 +++
 2 files changed, 48 insertions(+)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 99d078931c9e9..adf6d57ac 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -429,6 +429,9 @@ class SrcSafetyAnalysis {
   }
 
   SrcState computeNext(const MCInst &Point, const SrcState &Cur) {
+if (BC.MIB->isCFI(Point))
+  return Cur;
+
 SrcStatePrinter P(BC);
 LLVM_DEBUG({
   dbgs() << "  SrcSafetyAnalysis::ComputeNext(";
@@ -703,6 +706,8 @@ class CFGUnawareSrcSafetyAnalysis : public 
SrcSafetyAnalysis,
 SrcState S = createEntryState();
 for (auto &I : BF.instrs()) {
   MCInst &Inst = I.second;
+  if (BC.MIB->isCFI(Inst))
+continue;
 
   // If there is a label before this instruction, it is possible that it
   // can be jumped-to, thus conservatively resetting S. As an exception,
@@ -982,6 +987,9 @@ class DstSafetyAnalysis {
   }
 
   DstState computeNext(const MCInst &Point, const DstState &Cur) {
+if (BC.MIB->isCFI(Point))
+  return Cur;
+
 DstStatePrinter P(BC);
 LLVM_DEBUG({
   dbgs() << "  DstSafetyAnalysis::ComputeNext(";
@@ -1151,6 +1159,8 @@ class CFGUnawareDstSafetyAnalysis : public 
DstSafetyAnalysis,
 DstState S = createUnsafeState();
 for (auto &I : llvm::reverse(BF.instrs())) {
   MCInst &Inst = I.second;
+  if (BC.MIB->isCFI(Inst))
+continue;
 
   // If Inst can change the control flow, we cannot be sure that the next
   // instruction (to be executed in analyzed program) is the one processed
@@ -1341,6 +1351,9 @@ void FunctionAnalysisContext::findUnsafeUses(
   });
 
   iterateOverInstrs(BF, [&](MCInstReference Inst) {
+if (BC.MIB->isCFI(Inst))
+  return;
+
 const SrcState &S = Analysis->getStateBefore(Inst);
 
 // If non-empty state was never propagated from the entry basic block
@@ -1404,6 +1417,9 @@ void FunctionAnalysisContext::findUnsafeDefs(
   });
 
   iterateOverInstrs(BF, [&](MCInstReference Inst) {
+if (BC.MIB->isCFI(Inst))
+  return;
+
 const DstState &S = Analysis->getStateAfter(Inst);
 
 if (auto Report = shouldReportAuthOracle(BC, Inst, S))
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s 
b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
index 61aa84377b88e..5aec945621987 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
@@ -329,6 +329,38 @@ auth_oracle:
 // PAUTH-EMPTY:
 // PAUTH-NEXT:   Attaching leakage info to: :  autia   x0, x1 
# DataflowDstSafetyAnalysis: dst-state
 
+// Gadget scanner should not crash on CFI instructions, including when 
debug-printing them.
+// Note that the particular debug output is not checked, but BOLT should be
+// compiled with assertions enabled to support -debug-only argument.
+
+.globl  cfi_inst_df
+.type   cfi_inst_df,@function
+cfi_inst_df:
+.cfi_startproc
+sub sp, sp, #16
+.cfi_def_cfa_offset 16
+add sp, sp, #16
+.cfi_def_cfa_offset 0
+ret
+.size   cfi_inst_df, .-cfi_inst_df
+.cfi_endproc
+
+.globl  cfi_inst_nocfg
+.type   cfi_inst_nocfg,@function
+cfi_inst_nocfg:
+.cfi_startproc
+sub sp, sp, #16
+.cfi_def_cfa_offset 16
+
+adr x0, 1f
+br  x0
+1:
+add sp, sp, #16
+.cfi_def_cfa_offset 0
+ret
+.size   cfi_inst_nocfg, .-cfi_inst_nocfg
+.cfi_endproc
+
 // CHECK-LABEL:Analyzing function main, AllocatorId = 1
 .globl  main
 .type   main,@function

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Factor out MCInstReference from gadget scanner (NFC) (PR #138655)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/138655

>From b98ae87ae7ce7b735c85896db7f24468fdad056a Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Mon, 28 Apr 2025 18:35:48 +0300
Subject: [PATCH] [BOLT] Factor out MCInstReference from gadget scanner (NFC)

Move MCInstReference representing a constant reference to an instruction
inside a parent entity - either inside a basic block (which has a
reference to its parent function) or directly to the function (when CFG
information is not available).
---
 bolt/include/bolt/Core/MCInstUtils.h  | 168 +
 bolt/include/bolt/Passes/PAuthGadgetScanner.h | 178 +-
 bolt/lib/Core/CMakeLists.txt  |   1 +
 bolt/lib/Core/MCInstUtils.cpp |  57 ++
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 102 +-
 5 files changed, 269 insertions(+), 237 deletions(-)
 create mode 100644 bolt/include/bolt/Core/MCInstUtils.h
 create mode 100644 bolt/lib/Core/MCInstUtils.cpp

diff --git a/bolt/include/bolt/Core/MCInstUtils.h 
b/bolt/include/bolt/Core/MCInstUtils.h
new file mode 100644
index 0..69bf5e6159b74
--- /dev/null
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -0,0 +1,168 @@
+//===- bolt/Core/MCInstUtils.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef BOLT_CORE_MCINSTUTILS_H
+#define BOLT_CORE_MCINSTUTILS_H
+
+#include "bolt/Core/BinaryBasicBlock.h"
+
+#include 
+#include 
+#include 
+
+namespace llvm {
+namespace bolt {
+
+class BinaryFunction;
+
+/// MCInstReference represents a reference to a constant MCInst as stored 
either
+/// in a BinaryFunction (i.e. before a CFG is created), or in a 
BinaryBasicBlock
+/// (after a CFG is created).
+class MCInstReference {
+  using nocfg_const_iterator = std::map::const_iterator;
+
+  // Two cases are possible:
+  // * functions with CFG reconstructed - a function stores a collection of
+  //   basic blocks, each basic block stores a contiguous vector of MCInst
+  // * functions without CFG - there are no basic blocks created,
+  //   the instructions are directly stored in std::map in BinaryFunction
+  //
+  // In both cases, the direct parent of MCInst is stored together with an
+  // iterator pointing to the instruction.
+
+  // Helper struct: CFG is available, the direct parent is a basic block,
+  // iterator's type is `MCInst *`.
+  struct RefInBB {
+RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
+: BB(BB), It(Inst) {}
+RefInBB(const RefInBB &Other) = default;
+RefInBB &operator=(const RefInBB &Other) = default;
+
+const BinaryBasicBlock *BB;
+BinaryBasicBlock::const_iterator It;
+
+bool operator<(const RefInBB &Other) const {
+  return std::tie(BB, It) < std::tie(Other.BB, Other.It);
+}
+
+bool operator==(const RefInBB &Other) const {
+  return BB == Other.BB && It == Other.It;
+}
+  };
+
+  // Helper struct: CFG is *not* available, the direct parent is a function,
+  // iterator's type is std::map::iterator (the mapped value
+  // is an instruction's offset).
+  struct RefInBF {
+RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
+: BF(BF), It(It) {}
+RefInBF(const RefInBF &Other) = default;
+RefInBF &operator=(const RefInBF &Other) = default;
+
+const BinaryFunction *BF;
+nocfg_const_iterator It;
+
+bool operator<(const RefInBF &Other) const {
+  return std::tie(BF, It->first) < std::tie(Other.BF, Other.It->first);
+}
+
+bool operator==(const RefInBF &Other) const {
+  return BF == Other.BF && It->first == Other.It->first;
+}
+  };
+
+  std::variant Reference;
+
+  // Utility methods to be used like this:
+  //
+  // if (auto *Ref = tryGetRefInBB())
+  //   return Ref->doSomething(...);
+  // return getRefInBF().doSomethingElse(...);
+  const RefInBB *tryGetRefInBB() const {
+assert(std::get_if(&Reference) ||
+   std::get_if(&Reference));
+return std::get_if(&Reference);
+  }
+  const RefInBF &getRefInBF() const {
+assert(std::get_if(&Reference));
+return *std::get_if(&Reference);
+  }
+
+public:
+  /// Constructs an empty reference.
+  MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {}
+  /// Constructs a reference to the instruction inside the basic block.
+  MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst)
+  : Reference(RefInBB(BB, Inst)) {
+assert(BB && Inst && "Neither BB nor Inst should be nullptr");
+  }
+  /// Constructs a reference to the instruction inside the basic block.
+  MCInstReference(const BinaryBasicBlock *BB, unsigned Index)
+  : Reference(RefInBB(BB, &BB->getInstructionAtIndex(I

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: optionally assume auth traps on failure (PR #139778)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/139778

>From 639d7046baca3f37d4ec17daf1116c28bac604b1 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 13 May 2025 19:50:41 +0300
Subject: [PATCH] [BOLT] Gadget scanner: optionally assume auth traps on
 failure

On AArch64 it is possible for an auth instruction to either return an
invalid address value on failure (without FEAT_FPAC) or generate an
error (with FEAT_FPAC). It thus may be possible to never emit explicit
pointer checks, if the target CPU is known to support FEAT_FPAC.

This commit implements an --auth-traps-on-failure command line option,
which essentially makes "safe-to-dereference" and "trusted" register
properties identical and disables scanning for authentication oracles
completely.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 112 +++
 .../binary-analysis/AArch64/cmdline-args.test |   1 +
 .../AArch64/gs-pauth-authentication-oracles.s |   6 +-
 .../binary-analysis/AArch64/gs-pauth-calls.s  |   5 +-
 .../AArch64/gs-pauth-debug-output.s   | 177 ++---
 .../AArch64/gs-pauth-jump-table.s |   6 +-
 .../AArch64/gs-pauth-signing-oracles.s|  54 ++---
 .../AArch64/gs-pauth-tail-calls.s | 184 +-
 8 files changed, 318 insertions(+), 227 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index f979b1480d0b1..119cb928337a9 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -14,6 +14,7 @@
 #include "bolt/Passes/PAuthGadgetScanner.h"
 #include "bolt/Core/ParallelUtilities.h"
 #include "bolt/Passes/DataflowAnalysis.h"
+#include "bolt/Utils/CommandLineOpts.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/MC/MCInst.h"
@@ -26,6 +27,11 @@ namespace llvm {
 namespace bolt {
 namespace PAuthGadgetScanner {
 
+static cl::opt AuthTrapsOnFailure(
+"auth-traps-on-failure",
+cl::desc("Assume authentication instructions always trap on failure"),
+cl::cat(opts::BinaryAnalysisCategory));
+
 [[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef 
Label,
const MCInst &MI) {
   dbgs() << "  " << Label << ": ";
@@ -363,6 +369,34 @@ class SrcSafetyAnalysis {
 return Clobbered;
   }
 
+  std::optional getRegMadeTrustedByChecking(const MCInst &Inst,
+   SrcState Cur) const {
+// This functions cannot return multiple registers. This is never the case
+// on AArch64.
+std::optional RegCheckedByInst =
+BC.MIB->getAuthCheckedReg(Inst, /*MayOverwrite=*/false);
+if (RegCheckedByInst && Cur.SafeToDerefRegs[*RegCheckedByInst])
+  return *RegCheckedByInst;
+
+auto It = CheckerSequenceInfo.find(&Inst);
+if (It == CheckerSequenceInfo.end())
+  return std::nullopt;
+
+MCPhysReg RegCheckedBySequence = It->second.first;
+const MCInst *FirstCheckerInst = It->second.second;
+
+// FirstCheckerInst should belong to the same basic block (see the
+// assertion in DataflowSrcSafetyAnalysis::run()), meaning it was
+// deterministically processed a few steps before this instruction.
+const SrcState &StateBeforeChecker = getStateBefore(*FirstCheckerInst);
+
+// The sequence checks the register, but it should be authenticated before.
+if (!StateBeforeChecker.SafeToDerefRegs[RegCheckedBySequence])
+  return std::nullopt;
+
+return RegCheckedBySequence;
+  }
+
   // Returns all registers that can be treated as if they are written by an
   // authentication instruction.
   SmallVector getRegsMadeSafeToDeref(const MCInst &Point,
@@ -385,18 +419,38 @@ class SrcSafetyAnalysis {
 Regs.push_back(DstAndSrc->first);
 }
 
+// Make sure explicit checker sequence keeps register safe-to-dereference
+// when the register would be clobbered according to the regular rules:
+//
+//; LR is safe to dereference here
+//mov   x16, x30  ; start of the sequence, LR is s-t-d right before
+//xpaclri ; clobbers LR, LR is not safe anymore
+//cmp   x30, x16
+//b.eq  1f; end of the sequence: LR is marked as trusted
+//brk   0x1234
+//  1:
+//; at this point LR would be marked as trusted,
+//; but not safe-to-dereference
+//
+// or even just
+//
+//; X1 is safe to dereference here
+//ldr x0, [x1, #8]!
+//; X1 is trusted here, but it was clobbered due to address write-back
+if (auto CheckedReg = getRegMadeTrustedByChecking(Point, Cur))
+  Regs.push_back(*CheckedReg);
+
 return Regs;
   }
 
   // Returns all registers made trusted by this instruction.
   SmallVector getRegsMadeTrusted(const MCInst &Point,
 const SrcState &Cur) const {
+assert(!AuthTrapsOnFailure &&

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: prevent false positives due to jump tables (PR #138884)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/138884

>From c2a64ee8c1f0446ce98defb3a0df8230521f66aa Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 6 May 2025 11:31:03 +0300
Subject: [PATCH] [BOLT] Gadget scanner: prevent false positives due to jump
 tables

As part of PAuth hardening, AArch64 LLVM backend can use a special
BR_JumpTable pseudo (enabled by -faarch64-jump-table-hardening
Clang option) which is expanded in the AsmPrinter into a contiguous
sequence without unsafe instructions in the middle.

This commit adds another target-specific callback to MCPlusBuilder
to make it possible to inhibit false positives for known-safe jump
table dispatch sequences. Without special handling, the branch
instruction is likely to be reported as a non-protected call (as its
destination is not produced by an auth instruction, PC-relative address
materialization, etc.) and possibly as a tail call being performed with
unsafe link register (as the detection whether the branch instruction
is a tail call is an heuristic).

For now, only the specific instruction sequence used by the AArch64
LLVM backend is matched.
---
 bolt/include/bolt/Core/MCInstUtils.h  |   9 +
 bolt/include/bolt/Core/MCPlusBuilder.h|  14 +
 bolt/lib/Core/MCInstUtils.cpp |  20 +
 bolt/lib/Passes/PAuthGadgetScanner.cpp|  10 +
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  73 ++
 .../AArch64/gs-pauth-jump-table.s | 703 ++
 6 files changed, 829 insertions(+)
 create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-jump-table.s

diff --git a/bolt/include/bolt/Core/MCInstUtils.h 
b/bolt/include/bolt/Core/MCInstUtils.h
index 50b7d56470c99..33d36cccbcfff 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -154,6 +154,15 @@ class MCInstReference {
 return nullptr;
   }
 
+  /// Returns the only preceding instruction, or std::nullopt if multiple or no
+  /// predecessors are possible.
+  ///
+  /// If CFG information is available, basic block boundary can be crossed,
+  /// provided there is exactly one predecessor. If CFG is not available, the
+  /// preceding instruction in the offset order is returned, unless this is the
+  /// first instruction of the function.
+  std::optional getSinglePredecessor();
+
   raw_ostream &print(raw_ostream &OS) const;
 };
 
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h 
b/bolt/include/bolt/Core/MCPlusBuilder.h
index c8cbcaf33f4b5..3abf4d18e94da 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -14,6 +14,7 @@
 #ifndef BOLT_CORE_MCPLUSBUILDER_H
 #define BOLT_CORE_MCPLUSBUILDER_H
 
+#include "bolt/Core/MCInstUtils.h"
 #include "bolt/Core/MCPlus.h"
 #include "bolt/Core/Relocation.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -700,6 +701,19 @@ class MCPlusBuilder {
 return std::nullopt;
   }
 
+  /// Tests if BranchInst corresponds to an instruction sequence which is known
+  /// to be a safe dispatch via jump table.
+  ///
+  /// The target can decide which instruction sequences to consider "safe" from
+  /// the Pointer Authentication point of view, such as any jump table dispatch
+  /// sequence without function calls inside, any sequence which is contiguous,
+  /// or only some specific well-known sequences.
+  virtual bool
+  isSafeJumpTableBranchForPtrAuth(MCInstReference BranchInst) const {
+llvm_unreachable("not implemented");
+return false;
+  }
+
   virtual bool isTerminator(const MCInst &Inst) const;
 
   virtual bool isNoop(const MCInst &Inst) const {
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
index 40f6edd59135c..b7c6d898988af 100644
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -55,3 +55,23 @@ raw_ostream &MCInstReference::print(raw_ostream &OS) const {
   OS << ">";
   return OS;
 }
+
+std::optional MCInstReference::getSinglePredecessor() {
+  if (const RefInBB *Ref = tryGetRefInBB()) {
+if (Ref->It != Ref->BB->begin())
+  return MCInstReference(Ref->BB, &*std::prev(Ref->It));
+
+if (Ref->BB->pred_size() != 1)
+  return std::nullopt;
+
+BinaryBasicBlock *PredBB = *Ref->BB->pred_begin();
+assert(!PredBB->empty() && "Empty basic blocks are not supported yet");
+return MCInstReference(PredBB, &*PredBB->rbegin());
+  }
+
+  const RefInBF &Ref = getRefInBF();
+  if (Ref.It == Ref.BF->instrs().begin())
+return std::nullopt;
+
+  return MCInstReference(Ref.BF, std::prev(Ref.It));
+}
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 573db1856a134..f979b1480d0b1 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -1337,6 +1337,11 @@ shouldReportUnsafeTailCall(const BinaryContext &BC, 
const BinaryFunction &BF,
 return std::nullopt;
   }
 
+  if (BC.MIB->isSafeJumpTableBranchForPtrAuth(Inst)) {
+LL

[llvm-branch-commits] [llvm] [BOLT] Factor out MCInstReference from gadget scanner (NFC) (PR #138655)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/138655

>From b98ae87ae7ce7b735c85896db7f24468fdad056a Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Mon, 28 Apr 2025 18:35:48 +0300
Subject: [PATCH] [BOLT] Factor out MCInstReference from gadget scanner (NFC)

Move MCInstReference representing a constant reference to an instruction
inside a parent entity - either inside a basic block (which has a
reference to its parent function) or directly to the function (when CFG
information is not available).
---
 bolt/include/bolt/Core/MCInstUtils.h  | 168 +
 bolt/include/bolt/Passes/PAuthGadgetScanner.h | 178 +-
 bolt/lib/Core/CMakeLists.txt  |   1 +
 bolt/lib/Core/MCInstUtils.cpp |  57 ++
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 102 +-
 5 files changed, 269 insertions(+), 237 deletions(-)
 create mode 100644 bolt/include/bolt/Core/MCInstUtils.h
 create mode 100644 bolt/lib/Core/MCInstUtils.cpp

diff --git a/bolt/include/bolt/Core/MCInstUtils.h 
b/bolt/include/bolt/Core/MCInstUtils.h
new file mode 100644
index 0..69bf5e6159b74
--- /dev/null
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -0,0 +1,168 @@
+//===- bolt/Core/MCInstUtils.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef BOLT_CORE_MCINSTUTILS_H
+#define BOLT_CORE_MCINSTUTILS_H
+
+#include "bolt/Core/BinaryBasicBlock.h"
+
+#include 
+#include 
+#include 
+
+namespace llvm {
+namespace bolt {
+
+class BinaryFunction;
+
+/// MCInstReference represents a reference to a constant MCInst as stored 
either
+/// in a BinaryFunction (i.e. before a CFG is created), or in a 
BinaryBasicBlock
+/// (after a CFG is created).
+class MCInstReference {
+  using nocfg_const_iterator = std::map::const_iterator;
+
+  // Two cases are possible:
+  // * functions with CFG reconstructed - a function stores a collection of
+  //   basic blocks, each basic block stores a contiguous vector of MCInst
+  // * functions without CFG - there are no basic blocks created,
+  //   the instructions are directly stored in std::map in BinaryFunction
+  //
+  // In both cases, the direct parent of MCInst is stored together with an
+  // iterator pointing to the instruction.
+
+  // Helper struct: CFG is available, the direct parent is a basic block,
+  // iterator's type is `MCInst *`.
+  struct RefInBB {
+RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
+: BB(BB), It(Inst) {}
+RefInBB(const RefInBB &Other) = default;
+RefInBB &operator=(const RefInBB &Other) = default;
+
+const BinaryBasicBlock *BB;
+BinaryBasicBlock::const_iterator It;
+
+bool operator<(const RefInBB &Other) const {
+  return std::tie(BB, It) < std::tie(Other.BB, Other.It);
+}
+
+bool operator==(const RefInBB &Other) const {
+  return BB == Other.BB && It == Other.It;
+}
+  };
+
+  // Helper struct: CFG is *not* available, the direct parent is a function,
+  // iterator's type is std::map::iterator (the mapped value
+  // is an instruction's offset).
+  struct RefInBF {
+RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
+: BF(BF), It(It) {}
+RefInBF(const RefInBF &Other) = default;
+RefInBF &operator=(const RefInBF &Other) = default;
+
+const BinaryFunction *BF;
+nocfg_const_iterator It;
+
+bool operator<(const RefInBF &Other) const {
+  return std::tie(BF, It->first) < std::tie(Other.BF, Other.It->first);
+}
+
+bool operator==(const RefInBF &Other) const {
+  return BF == Other.BF && It->first == Other.It->first;
+}
+  };
+
+  std::variant Reference;
+
+  // Utility methods to be used like this:
+  //
+  // if (auto *Ref = tryGetRefInBB())
+  //   return Ref->doSomething(...);
+  // return getRefInBF().doSomethingElse(...);
+  const RefInBB *tryGetRefInBB() const {
+assert(std::get_if(&Reference) ||
+   std::get_if(&Reference));
+return std::get_if(&Reference);
+  }
+  const RefInBF &getRefInBF() const {
+assert(std::get_if(&Reference));
+return *std::get_if(&Reference);
+  }
+
+public:
+  /// Constructs an empty reference.
+  MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {}
+  /// Constructs a reference to the instruction inside the basic block.
+  MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst)
+  : Reference(RefInBB(BB, Inst)) {
+assert(BB && Inst && "Neither BB nor Inst should be nullptr");
+  }
+  /// Constructs a reference to the instruction inside the basic block.
+  MCInstReference(const BinaryBasicBlock *BB, unsigned Index)
+  : Reference(RefInBB(BB, &BB->getInstructionAtIndex(I

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: improve handling of unreachable basic blocks (PR #136183)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/136183

>From 3c131bb85c51e5d532ac2e70f006901703a5546e Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Thu, 17 Apr 2025 20:51:16 +0300
Subject: [PATCH 1/3] [BOLT] Gadget scanner: improve handling of unreachable
 basic blocks

Instead of refusing to analyze an instruction completely, when it is
unreachable according to the CFG reconstructed by BOLT, pessimistically
assume all registers to be unsafe at the start of basic blocks without
any predecessors. Nevertheless, unreachable basic blocks found in
optimized code likely means imprecise CFG reconstruction, thus report a
warning once per basic block without predecessors.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 46 ++-
 .../AArch64/gs-pacret-autiasp.s   |  7 ++-
 .../binary-analysis/AArch64/gs-pauth-calls.s  | 57 +++
 3 files changed, 95 insertions(+), 15 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index adf6d57ac..c0e01bb7aa845 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -341,6 +341,12 @@ class SrcSafetyAnalysis {
 return S;
   }
 
+  /// Creates a state with all registers marked unsafe (not to be confused
+  /// with empty state).
+  SrcState createUnsafeState() const {
+return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
+  }
+
   BitVector getClobberedRegs(const MCInst &Point) const {
 BitVector Clobbered(NumRegs);
 // Assume a call can clobber all registers, including callee-saved
@@ -584,6 +590,13 @@ class DataflowSrcSafetyAnalysis
 if (BB.isEntryPoint())
   return createEntryState();
 
+// If a basic block without any predecessors is found in an optimized code,
+// this likely means that some CFG edges were not detected. Pessimistically
+// assume all registers to be unsafe before this basic block and warn about
+// this fact in FunctionAnalysis::findUnsafeUses().
+if (BB.pred_empty())
+  return createUnsafeState();
+
 return SrcState();
   }
 
@@ -688,12 +701,6 @@ class CFGUnawareSrcSafetyAnalysis : public 
SrcSafetyAnalysis,
   using SrcSafetyAnalysis::BC;
   BinaryFunction &BF;
 
-  /// Creates a state with all registers marked unsafe (not to be confused
-  /// with empty state).
-  SrcState createUnsafeState() const {
-return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
-  }
-
 public:
   CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF,
   MCPlusBuilder::AllocatorIdTy AllocId,
@@ -1350,19 +1357,30 @@ void FunctionAnalysisContext::findUnsafeUses(
 BF.dump();
   });
 
+  if (BF.hasCFG()) {
+// Warn on basic blocks being unreachable according to BOLT, as this
+// likely means CFG is imprecise.
+for (BinaryBasicBlock &BB : BF) {
+  if (!BB.pred_empty() || BB.isEntryPoint())
+continue;
+  // Arbitrarily attach the report to the first instruction of BB.
+  MCInst *InstToReport = BB.getFirstNonPseudoInstr();
+  if (!InstToReport)
+continue; // BB has no real instructions
+
+  Reports.push_back(
+  make_generic_report(MCInstReference::get(InstToReport, BF),
+  "Warning: no predecessor basic blocks detected "
+  "(possibly incomplete CFG)"));
+}
+  }
+
   iterateOverInstrs(BF, [&](MCInstReference Inst) {
 if (BC.MIB->isCFI(Inst))
   return;
 
 const SrcState &S = Analysis->getStateBefore(Inst);
-
-// If non-empty state was never propagated from the entry basic block
-// to Inst, assume it to be unreachable and report a warning.
-if (S.empty()) {
-  Reports.push_back(
-  make_generic_report(Inst, "Warning: unreachable instruction found"));
-  return;
-}
+assert(!S.empty() && "Instruction has no associated state");
 
 if (auto Report = shouldReportReturnGadget(BC, Inst, S))
   Reports.push_back(*Report);
diff --git a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s 
b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
index 284f0bea607a5..6559ba336e8de 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
@@ -215,12 +215,17 @@ f_callclobbered_calleesaved:
 .globl  f_unreachable_instruction
 .type   f_unreachable_instruction,@function
 f_unreachable_instruction:
-// CHECK-LABEL: GS-PAUTH: Warning: unreachable instruction found in function 
f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address
+// CHECK-LABEL: GS-PAUTH: Warning: no predecessor basic blocks detected 
(possibly incomplete CFG) in function f_unreachable_instruction, basic block 
{{[0-9a-zA-Z.]+}}, at address
 // CHECK-NEXT:The instruction is {{[0-9a-f]+}}:   add x0, x1, 
x2
 // CHECK-NOT:   instructions that write t

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect untrusted LR before tail call (PR #137224)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/137224

>From cfc7f4dac44c66f125c8d514c3d0a26d36dc0779 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 22 Apr 2025 21:43:14 +0300
Subject: [PATCH 1/2] [BOLT] Gadget scanner: detect untrusted LR before tail
 call

Implement the detection of tail calls performed with untrusted link
register, which violates the assumption made on entry to every function.

Unlike other pauth gadgets, this one involves some amount of guessing
which branch instructions should be checked as tail calls.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp|  94 ++-
 .../AArch64/gs-pacret-autiasp.s   |  31 +-
 .../AArch64/gs-pauth-debug-output.s   |  30 +-
 .../AArch64/gs-pauth-tail-calls.s | 597 ++
 4 files changed, 706 insertions(+), 46 deletions(-)
 create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-tail-calls.s

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index c34ca620bb50b..49087eab3ce9a 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -736,19 +736,14 @@ template  class CFGUnawareAnalysis {
 //
 // Then, a function can be split into a number of disjoint contiguous sequences
 // of instructions without labels in between. These sequences can be processed
-// the same way basic blocks are processed by data-flow analysis, assuming
-// pessimistically that all registers are unsafe at the start of each sequence.
+// the same way basic blocks are processed by data-flow analysis, with the same
+// pessimistic estimation of the initial state at the start of each sequence
+// (except the first instruction of the function).
 class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis,
 public CFGUnawareAnalysis {
   using SrcSafetyAnalysis::BC;
   BinaryFunction &BF;
 
-  /// Creates a state with all registers marked unsafe (not to be confused
-  /// with empty state).
-  SrcState createUnsafeState() const {
-return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
-  }
-
 public:
   CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF,
   MCPlusBuilder::AllocatorIdTy AllocId,
@@ -758,6 +753,7 @@ class CFGUnawareSrcSafetyAnalysis : public 
SrcSafetyAnalysis,
   }
 
   void run() override {
+const SrcState DefaultState = computePessimisticState(BF);
 SrcState S = createEntryState();
 for (auto &I : BF.instrs()) {
   MCInst &Inst = I.second;
@@ -772,7 +768,7 @@ class CFGUnawareSrcSafetyAnalysis : public 
SrcSafetyAnalysis,
 LLVM_DEBUG({
   traceInst(BC, "Due to label, resetting the state before", Inst);
 });
-S = createUnsafeState();
+S = DefaultState;
   }
 
   // Attach the state *before* this instruction executes.
@@ -1297,6 +1293,83 @@ shouldReportReturnGadget(const BinaryContext &BC, const 
MCInstReference &Inst,
   return make_gadget_report(RetKind, Inst, *RetReg);
 }
 
+/// While BOLT already marks some of the branch instructions as tail calls,
+/// this function tries to improve the coverage by including less obvious cases
+/// when it is possible to do without introducing too many false positives.
+static bool shouldAnalyzeTailCallInst(const BinaryContext &BC,
+  const BinaryFunction &BF,
+  const MCInstReference &Inst) {
+  // Some BC.MIB->isXYZ(Inst) methods simply delegate to MCInstrDesc::isXYZ()
+  // (such as isBranch at the time of writing this comment), some don't (such
+  // as isCall). For that reason, call MCInstrDesc's methods explicitly when
+  // it is important.
+  const MCInstrDesc &Desc =
+  BC.MII->get(static_cast(Inst).getOpcode());
+  // Tail call should be a branch (but not necessarily an indirect one).
+  if (!Desc.isBranch())
+return false;
+
+  // Always analyze the branches already marked as tail calls by BOLT.
+  if (BC.MIB->isTailCall(Inst))
+return true;
+
+  // Try to also check the branches marked as "UNKNOWN CONTROL FLOW" - the
+  // below is a simplified condition from BinaryContext::printInstruction.
+  bool IsUnknownControlFlow =
+  BC.MIB->isIndirectBranch(Inst) && !BC.MIB->getJumpTable(Inst);
+
+  if (BF.hasCFG() && IsUnknownControlFlow)
+return true;
+
+  return false;
+}
+
+static std::optional>
+shouldReportUnsafeTailCall(const BinaryContext &BC, const BinaryFunction &BF,
+   const MCInstReference &Inst, const SrcState &S) {
+  static const GadgetKind UntrustedLRKind(
+  "untrusted link register found before tail call");
+
+  if (!shouldAnalyzeTailCallInst(BC, BF, Inst))
+return std::nullopt;
+
+  // Not only the set of registers returned by getTrustedLiveInRegs() can be
+  // seen as a reasonable target-independent _approximation_ of "the LR", these
+  // are *exactly* those regis

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect untrusted LR before tail call (PR #137224)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/137224

>From cfc7f4dac44c66f125c8d514c3d0a26d36dc0779 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 22 Apr 2025 21:43:14 +0300
Subject: [PATCH 1/2] [BOLT] Gadget scanner: detect untrusted LR before tail
 call

Implement the detection of tail calls performed with untrusted link
register, which violates the assumption made on entry to every function.

Unlike other pauth gadgets, this one involves some amount of guessing
which branch instructions should be checked as tail calls.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp|  94 ++-
 .../AArch64/gs-pacret-autiasp.s   |  31 +-
 .../AArch64/gs-pauth-debug-output.s   |  30 +-
 .../AArch64/gs-pauth-tail-calls.s | 597 ++
 4 files changed, 706 insertions(+), 46 deletions(-)
 create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-tail-calls.s

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index c34ca620bb50b..49087eab3ce9a 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -736,19 +736,14 @@ template  class CFGUnawareAnalysis {
 //
 // Then, a function can be split into a number of disjoint contiguous sequences
 // of instructions without labels in between. These sequences can be processed
-// the same way basic blocks are processed by data-flow analysis, assuming
-// pessimistically that all registers are unsafe at the start of each sequence.
+// the same way basic blocks are processed by data-flow analysis, with the same
+// pessimistic estimation of the initial state at the start of each sequence
+// (except the first instruction of the function).
 class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis,
 public CFGUnawareAnalysis {
   using SrcSafetyAnalysis::BC;
   BinaryFunction &BF;
 
-  /// Creates a state with all registers marked unsafe (not to be confused
-  /// with empty state).
-  SrcState createUnsafeState() const {
-return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
-  }
-
 public:
   CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF,
   MCPlusBuilder::AllocatorIdTy AllocId,
@@ -758,6 +753,7 @@ class CFGUnawareSrcSafetyAnalysis : public 
SrcSafetyAnalysis,
   }
 
   void run() override {
+const SrcState DefaultState = computePessimisticState(BF);
 SrcState S = createEntryState();
 for (auto &I : BF.instrs()) {
   MCInst &Inst = I.second;
@@ -772,7 +768,7 @@ class CFGUnawareSrcSafetyAnalysis : public 
SrcSafetyAnalysis,
 LLVM_DEBUG({
   traceInst(BC, "Due to label, resetting the state before", Inst);
 });
-S = createUnsafeState();
+S = DefaultState;
   }
 
   // Attach the state *before* this instruction executes.
@@ -1297,6 +1293,83 @@ shouldReportReturnGadget(const BinaryContext &BC, const 
MCInstReference &Inst,
   return make_gadget_report(RetKind, Inst, *RetReg);
 }
 
+/// While BOLT already marks some of the branch instructions as tail calls,
+/// this function tries to improve the coverage by including less obvious cases
+/// when it is possible to do without introducing too many false positives.
+static bool shouldAnalyzeTailCallInst(const BinaryContext &BC,
+  const BinaryFunction &BF,
+  const MCInstReference &Inst) {
+  // Some BC.MIB->isXYZ(Inst) methods simply delegate to MCInstrDesc::isXYZ()
+  // (such as isBranch at the time of writing this comment), some don't (such
+  // as isCall). For that reason, call MCInstrDesc's methods explicitly when
+  // it is important.
+  const MCInstrDesc &Desc =
+  BC.MII->get(static_cast(Inst).getOpcode());
+  // Tail call should be a branch (but not necessarily an indirect one).
+  if (!Desc.isBranch())
+return false;
+
+  // Always analyze the branches already marked as tail calls by BOLT.
+  if (BC.MIB->isTailCall(Inst))
+return true;
+
+  // Try to also check the branches marked as "UNKNOWN CONTROL FLOW" - the
+  // below is a simplified condition from BinaryContext::printInstruction.
+  bool IsUnknownControlFlow =
+  BC.MIB->isIndirectBranch(Inst) && !BC.MIB->getJumpTable(Inst);
+
+  if (BF.hasCFG() && IsUnknownControlFlow)
+return true;
+
+  return false;
+}
+
+static std::optional>
+shouldReportUnsafeTailCall(const BinaryContext &BC, const BinaryFunction &BF,
+   const MCInstReference &Inst, const SrcState &S) {
+  static const GadgetKind UntrustedLRKind(
+  "untrusted link register found before tail call");
+
+  if (!shouldAnalyzeTailCallInst(BC, BF, Inst))
+return std::nullopt;
+
+  // Not only the set of registers returned by getTrustedLiveInRegs() can be
+  // seen as a reasonable target-independent _approximation_ of "the LR", these
+  // are *exactly* those regis

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: account for BRK when searching for auth oracles (PR #137975)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/137975

>From 97a6d12945bd00ac2234854fca1886b58596b72f Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Wed, 30 Apr 2025 16:08:10 +0300
Subject: [PATCH] [BOLT] Gadget scanner: account for BRK when searching for
 auth oracles

An authenticated pointer can be explicitly checked by the compiler via a
sequence of instructions that executes BRK on failure. It is important
to recognize such BRK instruction as checking every register (as it is
expected to immediately trigger an abnormal program termination) to
prevent false positive reports about authentication oracles:

autia   x2, x3
autia   x0, x1
; neither x0 nor x2 are checked at this point
eor x16, x0, x0, lsl #1
tbz x16, #62, on_success ; marks x0 as checked
; end of BB: for x2 to be checked here, it must be checked in both
; successor basic blocks
  on_failure:
brk 0xc470
  on_success:
; x2 is checked
ldr x1, [x2] ; marks x2 as checked
---
 bolt/include/bolt/Core/MCPlusBuilder.h| 14 ++
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 13 +-
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 24 --
 .../AArch64/gs-pauth-address-checks.s | 44 +--
 .../AArch64/gs-pauth-authentication-oracles.s |  9 ++--
 .../AArch64/gs-pauth-signing-oracles.s|  6 +--
 6 files changed, 75 insertions(+), 35 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h 
b/bolt/include/bolt/Core/MCPlusBuilder.h
index b233452985502..c8cbcaf33f4b5 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -707,6 +707,20 @@ class MCPlusBuilder {
 return false;
   }
 
+  /// Returns true if Inst is a trap instruction.
+  ///
+  /// Tests if Inst is an instruction that immediately causes an abnormal
+  /// program termination, for example when a security violation is detected
+  /// by a compiler-inserted check.
+  ///
+  /// @note An implementation of this method should likely return false for
+  /// calls to library functions like abort(), as it is possible that the
+  /// execution state is partially attacker-controlled at this point.
+  virtual bool isTrap(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return false;
+  }
+
   virtual bool isBreakpoint(const MCInst &Inst) const {
 llvm_unreachable("not implemented");
 return false;
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 49087eab3ce9a..a9d46ed3f49d0 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -1050,6 +1050,15 @@ class DstSafetyAnalysis {
   dbgs() << ")\n";
 });
 
+// If this instruction terminates the program immediately, no
+// authentication oracles are possible past this point.
+if (BC.MIB->isTrap(Point)) {
+  LLVM_DEBUG({ traceInst(BC, "Trap instruction found", Point); });
+  DstState Next(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
+  Next.CannotEscapeUnchecked.set();
+  return Next;
+}
+
 // If this instruction is reachable by the analysis, a non-empty state will
 // be propagated to it sooner or later. Until then, skip computeNext().
 if (Cur.empty()) {
@@ -1155,8 +1164,8 @@ class DataflowDstSafetyAnalysis
 //
 // A basic block without any successors, on the other hand, can be
 // pessimistically initialized to everything-is-unsafe: this will naturally
-// handle both return and tail call instructions and is harmless for
-// internal indirect branch instructions (such as computed gotos).
+// handle return, trap and tail call instructions. At the same time, it is
+// harmless for internal indirect branch instructions, like computed gotos.
 if (BB.succ_empty())
   return createUnsafeState();
 
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp 
b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 9d5a578cfbdff..b669d32cc2032 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -386,10 +386,9 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
 // the list of successors of this basic block as appropriate.
 
 // Any of the above code sequences assume the fall-through basic block
-// is a dead-end BRK instruction (any immediate operand is accepted).
+// is a dead-end trap instruction.
 const BinaryBasicBlock *BreakBB = BB.getFallthrough();
-if (!BreakBB || BreakBB->empty() ||
-BreakBB->front().getOpcode() != AArch64::BRK)
+if (!BreakBB || BreakBB->empty() || !isTrap(BreakBB->front()))
   return std::nullopt;
 
 // Iterate over the instructions of BB in reverse order, matching opcodes
@@ -1751,6 +1750,25 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
 Inst.addOperand(MCOperand::createImm(0));
   }
 

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: do not crash on debug-printing CFI instructions (PR #136151)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/136151

>From 36837ce02c564ee5b27a1b3bad164b1e77d93bb7 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 15 Apr 2025 21:47:18 +0300
Subject: [PATCH] [BOLT] Gadget scanner: do not crash on debug-printing CFI
 instructions

Some instruction-printing code used under LLVM_DEBUG does not handle CFI
instructions well. While CFI instructions seem to be harmless for the
correctness of the analysis results, they do not convey any useful
information to the analysis either, so skip them early.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 16 ++
 .../AArch64/gs-pauth-debug-output.s   | 32 +++
 2 files changed, 48 insertions(+)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 99d078931c9e9..adf6d57ac 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -429,6 +429,9 @@ class SrcSafetyAnalysis {
   }
 
   SrcState computeNext(const MCInst &Point, const SrcState &Cur) {
+if (BC.MIB->isCFI(Point))
+  return Cur;
+
 SrcStatePrinter P(BC);
 LLVM_DEBUG({
   dbgs() << "  SrcSafetyAnalysis::ComputeNext(";
@@ -703,6 +706,8 @@ class CFGUnawareSrcSafetyAnalysis : public 
SrcSafetyAnalysis,
 SrcState S = createEntryState();
 for (auto &I : BF.instrs()) {
   MCInst &Inst = I.second;
+  if (BC.MIB->isCFI(Inst))
+continue;
 
   // If there is a label before this instruction, it is possible that it
   // can be jumped-to, thus conservatively resetting S. As an exception,
@@ -982,6 +987,9 @@ class DstSafetyAnalysis {
   }
 
   DstState computeNext(const MCInst &Point, const DstState &Cur) {
+if (BC.MIB->isCFI(Point))
+  return Cur;
+
 DstStatePrinter P(BC);
 LLVM_DEBUG({
   dbgs() << "  DstSafetyAnalysis::ComputeNext(";
@@ -1151,6 +1159,8 @@ class CFGUnawareDstSafetyAnalysis : public 
DstSafetyAnalysis,
 DstState S = createUnsafeState();
 for (auto &I : llvm::reverse(BF.instrs())) {
   MCInst &Inst = I.second;
+  if (BC.MIB->isCFI(Inst))
+continue;
 
   // If Inst can change the control flow, we cannot be sure that the next
   // instruction (to be executed in analyzed program) is the one processed
@@ -1341,6 +1351,9 @@ void FunctionAnalysisContext::findUnsafeUses(
   });
 
   iterateOverInstrs(BF, [&](MCInstReference Inst) {
+if (BC.MIB->isCFI(Inst))
+  return;
+
 const SrcState &S = Analysis->getStateBefore(Inst);
 
 // If non-empty state was never propagated from the entry basic block
@@ -1404,6 +1417,9 @@ void FunctionAnalysisContext::findUnsafeDefs(
   });
 
   iterateOverInstrs(BF, [&](MCInstReference Inst) {
+if (BC.MIB->isCFI(Inst))
+  return;
+
 const DstState &S = Analysis->getStateAfter(Inst);
 
 if (auto Report = shouldReportAuthOracle(BC, Inst, S))
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s 
b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
index 61aa84377b88e..5aec945621987 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
@@ -329,6 +329,38 @@ auth_oracle:
 // PAUTH-EMPTY:
 // PAUTH-NEXT:   Attaching leakage info to: :  autia   x0, x1 
# DataflowDstSafetyAnalysis: dst-state
 
+// Gadget scanner should not crash on CFI instructions, including when 
debug-printing them.
+// Note that the particular debug output is not checked, but BOLT should be
+// compiled with assertions enabled to support -debug-only argument.
+
+.globl  cfi_inst_df
+.type   cfi_inst_df,@function
+cfi_inst_df:
+.cfi_startproc
+sub sp, sp, #16
+.cfi_def_cfa_offset 16
+add sp, sp, #16
+.cfi_def_cfa_offset 0
+ret
+.size   cfi_inst_df, .-cfi_inst_df
+.cfi_endproc
+
+.globl  cfi_inst_nocfg
+.type   cfi_inst_nocfg,@function
+cfi_inst_nocfg:
+.cfi_startproc
+sub sp, sp, #16
+.cfi_def_cfa_offset 16
+
+adr x0, 1f
+br  x0
+1:
+add sp, sp, #16
+.cfi_def_cfa_offset 0
+ret
+.size   cfi_inst_nocfg, .-cfi_inst_nocfg
+.cfi_endproc
+
 // CHECK-LABEL:Analyzing function main, AllocatorId = 1
 .globl  main
 .type   main,@function

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: account for BRK when searching for auth oracles (PR #137975)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/137975

>From 97a6d12945bd00ac2234854fca1886b58596b72f Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Wed, 30 Apr 2025 16:08:10 +0300
Subject: [PATCH] [BOLT] Gadget scanner: account for BRK when searching for
 auth oracles

An authenticated pointer can be explicitly checked by the compiler via a
sequence of instructions that executes BRK on failure. It is important
to recognize such BRK instruction as checking every register (as it is
expected to immediately trigger an abnormal program termination) to
prevent false positive reports about authentication oracles:

autia   x2, x3
autia   x0, x1
; neither x0 nor x2 are checked at this point
eor x16, x0, x0, lsl #1
tbz x16, #62, on_success ; marks x0 as checked
; end of BB: for x2 to be checked here, it must be checked in both
; successor basic blocks
  on_failure:
brk 0xc470
  on_success:
; x2 is checked
ldr x1, [x2] ; marks x2 as checked
---
 bolt/include/bolt/Core/MCPlusBuilder.h| 14 ++
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 13 +-
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 24 --
 .../AArch64/gs-pauth-address-checks.s | 44 +--
 .../AArch64/gs-pauth-authentication-oracles.s |  9 ++--
 .../AArch64/gs-pauth-signing-oracles.s|  6 +--
 6 files changed, 75 insertions(+), 35 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h 
b/bolt/include/bolt/Core/MCPlusBuilder.h
index b233452985502..c8cbcaf33f4b5 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -707,6 +707,20 @@ class MCPlusBuilder {
 return false;
   }
 
+  /// Returns true if Inst is a trap instruction.
+  ///
+  /// Tests if Inst is an instruction that immediately causes an abnormal
+  /// program termination, for example when a security violation is detected
+  /// by a compiler-inserted check.
+  ///
+  /// @note An implementation of this method should likely return false for
+  /// calls to library functions like abort(), as it is possible that the
+  /// execution state is partially attacker-controlled at this point.
+  virtual bool isTrap(const MCInst &Inst) const {
+llvm_unreachable("not implemented");
+return false;
+  }
+
   virtual bool isBreakpoint(const MCInst &Inst) const {
 llvm_unreachable("not implemented");
 return false;
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 49087eab3ce9a..a9d46ed3f49d0 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -1050,6 +1050,15 @@ class DstSafetyAnalysis {
   dbgs() << ")\n";
 });
 
+// If this instruction terminates the program immediately, no
+// authentication oracles are possible past this point.
+if (BC.MIB->isTrap(Point)) {
+  LLVM_DEBUG({ traceInst(BC, "Trap instruction found", Point); });
+  DstState Next(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
+  Next.CannotEscapeUnchecked.set();
+  return Next;
+}
+
 // If this instruction is reachable by the analysis, a non-empty state will
 // be propagated to it sooner or later. Until then, skip computeNext().
 if (Cur.empty()) {
@@ -1155,8 +1164,8 @@ class DataflowDstSafetyAnalysis
 //
 // A basic block without any successors, on the other hand, can be
 // pessimistically initialized to everything-is-unsafe: this will naturally
-// handle both return and tail call instructions and is harmless for
-// internal indirect branch instructions (such as computed gotos).
+// handle return, trap and tail call instructions. At the same time, it is
+// harmless for internal indirect branch instructions, like computed gotos.
 if (BB.succ_empty())
   return createUnsafeState();
 
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp 
b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 9d5a578cfbdff..b669d32cc2032 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -386,10 +386,9 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
 // the list of successors of this basic block as appropriate.
 
 // Any of the above code sequences assume the fall-through basic block
-// is a dead-end BRK instruction (any immediate operand is accepted).
+// is a dead-end trap instruction.
 const BinaryBasicBlock *BreakBB = BB.getFallthrough();
-if (!BreakBB || BreakBB->empty() ||
-BreakBB->front().getOpcode() != AArch64::BRK)
+if (!BreakBB || BreakBB->empty() || !isTrap(BreakBB->front()))
   return std::nullopt;
 
 // Iterate over the instructions of BB in reverse order, matching opcodes
@@ -1751,6 +1750,25 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
 Inst.addOperand(MCOperand::createImm(0));
   }
 

[llvm-branch-commits] [llvm] [BOLT] Introduce helpers to match `MCInst`s one at a time (NFC) (PR #138883)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/138883

>From 53fe8585ad9af1f924b87abeeef166529f5b6ca5 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Wed, 7 May 2025 16:42:00 +0300
Subject: [PATCH] [BOLT] Introduce helpers to match `MCInst`s one at a time
 (NFC)

Introduce matchInst helper function to capture and/or match the operands
of MCInst. Unlike the existing `MCPlusBuilder::MCInstMatcher` machinery,
matchInst is intended for the use cases when precise control over the
instruction order is required. For example, when validating PtrAuth
hardening, all registers are usually considered unsafe after a function
call, even though callee-saved registers should preserve their old
values *under normal operation*.
---
 bolt/include/bolt/Core/MCInstUtils.h  | 128 ++
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  90 +---
 2 files changed, 162 insertions(+), 56 deletions(-)

diff --git a/bolt/include/bolt/Core/MCInstUtils.h 
b/bolt/include/bolt/Core/MCInstUtils.h
index 69bf5e6159b74..50b7d56470c99 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -162,6 +162,134 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
   return Ref.print(OS);
 }
 
+/// Instruction-matching helpers operating on a single instruction at a time.
+///
+/// Unlike MCPlusBuilder::MCInstMatcher, this matchInst() function focuses on
+/// the cases where a precise control over the instruction order is important:
+///
+/// // Bring the short names into the local scope:
+/// using namespace MCInstMatcher;
+/// // Declare the registers to capture:
+/// Reg Xn, Xm;
+/// // Capture the 0th and 1st operands, match the 2nd operand against the
+/// // just captured Xm register, match the 3rd operand against literal 0:
+/// if (!matchInst(MaybeAdd, AArch64::ADDXrs, Xm, Xn, Xm, Imm(0))
+///   return AArch64::NoRegister;
+/// // Match the 0th operand against Xm:
+/// if (!matchInst(MaybeBr, AArch64::BR, Xm))
+///   return AArch64::NoRegister;
+/// // Return the matched register:
+/// return Xm.get();
+namespace MCInstMatcher {
+
+// The base class to match an operand of type T.
+//
+// The subclasses of OpMatcher are intended to be allocated on the stack and
+// to only be used by passing them to matchInst() and by calling their get()
+// function, thus the peculiar `mutable` specifiers: to make the calling code
+// compact and readable, the templated matchInst() function has to accept both
+// long-lived Imm/Reg wrappers declared as local variables (intended to capture
+// the first operand's value and match the subsequent operands, whether inside
+// a single instruction or across multiple instructions), as well as temporary
+// wrappers around literal values to match, f.e. Imm(42) or Reg(AArch64::XZR).
+template  class OpMatcher {
+  mutable std::optional Value;
+  mutable std::optional SavedValue;
+
+  // Remember/restore the last Value - to be called by matchInst.
+  void remember() const { SavedValue = Value; }
+  void restore() const { Value = SavedValue; }
+
+  template 
+  friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
+
+protected:
+  OpMatcher(std::optional ValueToMatch) : Value(ValueToMatch) {}
+
+  bool matchValue(T OpValue) const {
+// Check that OpValue does not contradict the existing Value.
+bool MatchResult = !Value || *Value == OpValue;
+// If MatchResult is false, all matchers will be reset before returning 
from
+// matchInst, including this one, thus no need to assign conditionally.
+Value = OpValue;
+
+return MatchResult;
+  }
+
+public:
+  /// Returns the captured value.
+  T get() const {
+assert(Value.has_value());
+return *Value;
+  }
+};
+
+class Reg : public OpMatcher {
+  bool matches(const MCOperand &Op) const {
+if (!Op.isReg())
+  return false;
+
+return matchValue(Op.getReg());
+  }
+
+  template 
+  friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
+
+public:
+  Reg(std::optional RegToMatch = std::nullopt)
+  : OpMatcher(RegToMatch) {}
+};
+
+class Imm : public OpMatcher {
+  bool matches(const MCOperand &Op) const {
+if (!Op.isImm())
+  return false;
+
+return matchValue(Op.getImm());
+  }
+
+  template 
+  friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
+
+public:
+  Imm(std::optional ImmToMatch = std::nullopt)
+  : OpMatcher(ImmToMatch) {}
+};
+
+/// Tries to match Inst and updates Ops on success.
+///
+/// If Inst has the specified Opcode and its operand list prefix matches Ops,
+/// this function returns true and updates Ops, otherwise false is returned and
+/// values of Ops are kept as before matchInst was called.
+///
+/// Please note that while Ops are technically passed by a const reference to
+/// make invocations like `matchInst(MI, Opcode, Imm(42))` possible, all their
+/// fields are marked mut

[llvm-branch-commits] [llvm] [BOLT] Introduce helpers to match `MCInst`s one at a time (NFC) (PR #138883)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/138883

>From 53fe8585ad9af1f924b87abeeef166529f5b6ca5 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Wed, 7 May 2025 16:42:00 +0300
Subject: [PATCH] [BOLT] Introduce helpers to match `MCInst`s one at a time
 (NFC)

Introduce matchInst helper function to capture and/or match the operands
of MCInst. Unlike the existing `MCPlusBuilder::MCInstMatcher` machinery,
matchInst is intended for the use cases when precise control over the
instruction order is required. For example, when validating PtrAuth
hardening, all registers are usually considered unsafe after a function
call, even though callee-saved registers should preserve their old
values *under normal operation*.
---
 bolt/include/bolt/Core/MCInstUtils.h  | 128 ++
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  90 +---
 2 files changed, 162 insertions(+), 56 deletions(-)

diff --git a/bolt/include/bolt/Core/MCInstUtils.h 
b/bolt/include/bolt/Core/MCInstUtils.h
index 69bf5e6159b74..50b7d56470c99 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -162,6 +162,134 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
   return Ref.print(OS);
 }
 
+/// Instruction-matching helpers operating on a single instruction at a time.
+///
+/// Unlike MCPlusBuilder::MCInstMatcher, this matchInst() function focuses on
+/// the cases where a precise control over the instruction order is important:
+///
+/// // Bring the short names into the local scope:
+/// using namespace MCInstMatcher;
+/// // Declare the registers to capture:
+/// Reg Xn, Xm;
+/// // Capture the 0th and 1st operands, match the 2nd operand against the
+/// // just captured Xm register, match the 3rd operand against literal 0:
+/// if (!matchInst(MaybeAdd, AArch64::ADDXrs, Xm, Xn, Xm, Imm(0))
+///   return AArch64::NoRegister;
+/// // Match the 0th operand against Xm:
+/// if (!matchInst(MaybeBr, AArch64::BR, Xm))
+///   return AArch64::NoRegister;
+/// // Return the matched register:
+/// return Xm.get();
+namespace MCInstMatcher {
+
+// The base class to match an operand of type T.
+//
+// The subclasses of OpMatcher are intended to be allocated on the stack and
+// to only be used by passing them to matchInst() and by calling their get()
+// function, thus the peculiar `mutable` specifiers: to make the calling code
+// compact and readable, the templated matchInst() function has to accept both
+// long-lived Imm/Reg wrappers declared as local variables (intended to capture
+// the first operand's value and match the subsequent operands, whether inside
+// a single instruction or across multiple instructions), as well as temporary
+// wrappers around literal values to match, f.e. Imm(42) or Reg(AArch64::XZR).
+template  class OpMatcher {
+  mutable std::optional Value;
+  mutable std::optional SavedValue;
+
+  // Remember/restore the last Value - to be called by matchInst.
+  void remember() const { SavedValue = Value; }
+  void restore() const { Value = SavedValue; }
+
+  template 
+  friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
+
+protected:
+  OpMatcher(std::optional ValueToMatch) : Value(ValueToMatch) {}
+
+  bool matchValue(T OpValue) const {
+// Check that OpValue does not contradict the existing Value.
+bool MatchResult = !Value || *Value == OpValue;
+// If MatchResult is false, all matchers will be reset before returning 
from
+// matchInst, including this one, thus no need to assign conditionally.
+Value = OpValue;
+
+return MatchResult;
+  }
+
+public:
+  /// Returns the captured value.
+  T get() const {
+assert(Value.has_value());
+return *Value;
+  }
+};
+
+class Reg : public OpMatcher {
+  bool matches(const MCOperand &Op) const {
+if (!Op.isReg())
+  return false;
+
+return matchValue(Op.getReg());
+  }
+
+  template 
+  friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
+
+public:
+  Reg(std::optional RegToMatch = std::nullopt)
+  : OpMatcher(RegToMatch) {}
+};
+
+class Imm : public OpMatcher {
+  bool matches(const MCOperand &Op) const {
+if (!Op.isImm())
+  return false;
+
+return matchValue(Op.getImm());
+  }
+
+  template 
+  friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
+
+public:
+  Imm(std::optional ImmToMatch = std::nullopt)
+  : OpMatcher(ImmToMatch) {}
+};
+
+/// Tries to match Inst and updates Ops on success.
+///
+/// If Inst has the specified Opcode and its operand list prefix matches Ops,
+/// this function returns true and updates Ops, otherwise false is returned and
+/// values of Ops are kept as before matchInst was called.
+///
+/// Please note that while Ops are technically passed by a const reference to
+/// make invocations like `matchInst(MI, Opcode, Imm(42))` possible, all their
+/// fields are marked mut

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSiganture] Add parsing of new number params in StaticSampler (PR #140291)

2025-05-26 Thread via llvm-branch-commits

https://github.com/joaosaffran approved this pull request.


https://github.com/llvm/llvm-project/pull/140291
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] compiler-rt: Introduce runtime functions for emulated PAC. (PR #133530)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits


@@ -0,0 +1,133 @@
+//===--- emupac.cpp - Emulated PAC implementation 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+//  This file implements Emulated PAC using SipHash_1_3 as the IMPDEF hashing
+//  scheme.
+//
+//===--===//
+
+#include 
+
+#include "siphash/SipHash.h"
+
+// EmuPAC implements runtime emulation of PAC instructions. If the current
+// CPU supports PAC, EmuPAC uses real PAC instructions. Otherwise, it uses the
+// emulation, which is effectively an implementation of PAC with an IMPDEF
+// hashing scheme based on SipHash_1_3.
+//
+// The purpose of the emulation is to allow programs to be built to be portable
+// to machines without PAC support, with some performance loss and increased
+// probability of false positives (due to not being able to portably determine
+// the VA size), while being functionally almost equivalent to running on a
+// machine with PAC support. One example of a use case is if PAC is used in
+// production as a security mitigation, but the testing environment is
+// heterogeneous (i.e. some machines lack PAC support). In this case we would
+// like the testing machines to be able to detect issues resulting
+// from the use of PAC instructions that would affect production by running
+// tests. This can be achieved by building test binaries with EmuPAC and
+// production binaries with real PAC.
+//
+// The emulation assumes that the VA size is at most 48 bits. The architecture
+// as of ARMv8.2, which was the last architecture version in which PAC was not
+// mandatory, permitted VA size up to 52 bits via ARMv8.2-LVA, but we are
+// unaware of an ARMv8.2 CPU that implemented ARMv8.2-LVA.
+
+const uint64_t kMaxVASize = 48;
+const uint64_t kPACMask = ((1ULL << 55) - 1) & ~((1ULL << kMaxVASize) - 1);
+const uint64_t kTTBR1Mask = 1ULL << 55;

atrosinenko wrote:

[nit] According to LLVM naming convention, this probably should be simply 
`MaxVASize`, `PACMask`, `TTBR1Mask`.

https://github.com/llvm/llvm-project/pull/133530
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSiganture] Add parsing of new number params in StaticSampler (PR #140291)

2025-05-26 Thread via llvm-branch-commits


@@ -161,6 +161,9 @@ struct DescriptorTableClause {
 struct StaticSampler {
   Register Reg;
   float MipLODBias = 0.f;
+  uint32_t MaxAnisotropy = 16;
+  float MinLOD = 0.f;
+  float MaxLOD = 3.402823466e+38f; // FLT_MAX

joaosaffran wrote:

```suggestion
  float MaxLOD = std::numeric_limits::max();
```

https://github.com/llvm/llvm-project/pull/140291
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSiganture] Add parsing of new number params in StaticSampler (PR #140291)

2025-05-26 Thread via llvm-branch-commits


@@ -241,13 +245,27 @@ TEST_F(ParseHLSLRootSignatureTest, 
ValidParseStaticSamplerTest) {
 
   ASSERT_FALSE(Parser.parse());
 
-  ASSERT_EQ(Elements.size(), 1u);
+  ASSERT_EQ(Elements.size(), 2u);
 
+  // Check default values are as expected
   RootElement Elem = Elements[0];
   ASSERT_TRUE(std::holds_alternative(Elem));
   ASSERT_EQ(std::get(Elem).Reg.ViewType, RegisterType::SReg);
   ASSERT_EQ(std::get(Elem).Reg.Number, 0u);
   ASSERT_EQ(std::get(Elem).MipLODBias, 0.f);
+  ASSERT_EQ(std::get(Elem).MaxAnisotropy, 16u);
+  ASSERT_EQ(std::get(Elem).MinLOD, 0.f);

joaosaffran wrote:

```suggestion
  EXPECT_FLOAT_EQ(std::get(Elem).MinLOD, 0.f);
```

Google tests has a special set of assert for floating point numbers: 
https://google.github.io/googletest/reference/assertions.html#floating-point

https://github.com/llvm/llvm-project/pull/140291
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSiganture] Add parsing of address params in StaticSampler (PR #140293)

2025-05-26 Thread via llvm-branch-commits

https://github.com/joaosaffran approved this pull request.


https://github.com/llvm/llvm-project/pull/140293
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add parsing of filter enum for StaticSampler (PR #140294)

2025-05-26 Thread via llvm-branch-commits

https://github.com/joaosaffran approved this pull request.


https://github.com/llvm/llvm-project/pull/140294
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add parsing of remaining enums to StaticSampler (PR #140305)

2025-05-26 Thread via llvm-branch-commits

https://github.com/joaosaffran approved this pull request.


https://github.com/llvm/llvm-project/pull/140305
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add space, visibility enums to StaticSampler (PR #140306)

2025-05-26 Thread via llvm-branch-commits

https://github.com/joaosaffran approved this pull request.


https://github.com/llvm/llvm-project/pull/140306
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: improve handling of unreachable basic blocks (PR #136183)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/136183

>From 3c131bb85c51e5d532ac2e70f006901703a5546e Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Thu, 17 Apr 2025 20:51:16 +0300
Subject: [PATCH 1/3] [BOLT] Gadget scanner: improve handling of unreachable
 basic blocks

Instead of refusing to analyze an instruction completely, when it is
unreachable according to the CFG reconstructed by BOLT, pessimistically
assume all registers to be unsafe at the start of basic blocks without
any predecessors. Nevertheless, unreachable basic blocks found in
optimized code likely means imprecise CFG reconstruction, thus report a
warning once per basic block without predecessors.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 46 ++-
 .../AArch64/gs-pacret-autiasp.s   |  7 ++-
 .../binary-analysis/AArch64/gs-pauth-calls.s  | 57 +++
 3 files changed, 95 insertions(+), 15 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index adf6d57ac..c0e01bb7aa845 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -341,6 +341,12 @@ class SrcSafetyAnalysis {
 return S;
   }
 
+  /// Creates a state with all registers marked unsafe (not to be confused
+  /// with empty state).
+  SrcState createUnsafeState() const {
+return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
+  }
+
   BitVector getClobberedRegs(const MCInst &Point) const {
 BitVector Clobbered(NumRegs);
 // Assume a call can clobber all registers, including callee-saved
@@ -584,6 +590,13 @@ class DataflowSrcSafetyAnalysis
 if (BB.isEntryPoint())
   return createEntryState();
 
+// If a basic block without any predecessors is found in an optimized code,
+// this likely means that some CFG edges were not detected. Pessimistically
+// assume all registers to be unsafe before this basic block and warn about
+// this fact in FunctionAnalysis::findUnsafeUses().
+if (BB.pred_empty())
+  return createUnsafeState();
+
 return SrcState();
   }
 
@@ -688,12 +701,6 @@ class CFGUnawareSrcSafetyAnalysis : public 
SrcSafetyAnalysis,
   using SrcSafetyAnalysis::BC;
   BinaryFunction &BF;
 
-  /// Creates a state with all registers marked unsafe (not to be confused
-  /// with empty state).
-  SrcState createUnsafeState() const {
-return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
-  }
-
 public:
   CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF,
   MCPlusBuilder::AllocatorIdTy AllocId,
@@ -1350,19 +1357,30 @@ void FunctionAnalysisContext::findUnsafeUses(
 BF.dump();
   });
 
+  if (BF.hasCFG()) {
+// Warn on basic blocks being unreachable according to BOLT, as this
+// likely means CFG is imprecise.
+for (BinaryBasicBlock &BB : BF) {
+  if (!BB.pred_empty() || BB.isEntryPoint())
+continue;
+  // Arbitrarily attach the report to the first instruction of BB.
+  MCInst *InstToReport = BB.getFirstNonPseudoInstr();
+  if (!InstToReport)
+continue; // BB has no real instructions
+
+  Reports.push_back(
+  make_generic_report(MCInstReference::get(InstToReport, BF),
+  "Warning: no predecessor basic blocks detected "
+  "(possibly incomplete CFG)"));
+}
+  }
+
   iterateOverInstrs(BF, [&](MCInstReference Inst) {
 if (BC.MIB->isCFI(Inst))
   return;
 
 const SrcState &S = Analysis->getStateBefore(Inst);
-
-// If non-empty state was never propagated from the entry basic block
-// to Inst, assume it to be unreachable and report a warning.
-if (S.empty()) {
-  Reports.push_back(
-  make_generic_report(Inst, "Warning: unreachable instruction found"));
-  return;
-}
+assert(!S.empty() && "Instruction has no associated state");
 
 if (auto Report = shouldReportReturnGadget(BC, Inst, S))
   Reports.push_back(*Report);
diff --git a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s 
b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
index 284f0bea607a5..6559ba336e8de 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
@@ -215,12 +215,17 @@ f_callclobbered_calleesaved:
 .globl  f_unreachable_instruction
 .type   f_unreachable_instruction,@function
 f_unreachable_instruction:
-// CHECK-LABEL: GS-PAUTH: Warning: unreachable instruction found in function 
f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address
+// CHECK-LABEL: GS-PAUTH: Warning: no predecessor basic blocks detected 
(possibly incomplete CFG) in function f_unreachable_instruction, basic block 
{{[0-9a-zA-Z.]+}}, at address
 // CHECK-NEXT:The instruction is {{[0-9a-f]+}}:   add x0, x1, 
x2
 // CHECK-NOT:   instructions that write t

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: optionally assume auth traps on failure (PR #139778)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/139778

>From 639d7046baca3f37d4ec17daf1116c28bac604b1 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 13 May 2025 19:50:41 +0300
Subject: [PATCH] [BOLT] Gadget scanner: optionally assume auth traps on
 failure

On AArch64 it is possible for an auth instruction to either return an
invalid address value on failure (without FEAT_FPAC) or generate an
error (with FEAT_FPAC). It thus may be possible to never emit explicit
pointer checks, if the target CPU is known to support FEAT_FPAC.

This commit implements an --auth-traps-on-failure command line option,
which essentially makes "safe-to-dereference" and "trusted" register
properties identical and disables scanning for authentication oracles
completely.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 112 +++
 .../binary-analysis/AArch64/cmdline-args.test |   1 +
 .../AArch64/gs-pauth-authentication-oracles.s |   6 +-
 .../binary-analysis/AArch64/gs-pauth-calls.s  |   5 +-
 .../AArch64/gs-pauth-debug-output.s   | 177 ++---
 .../AArch64/gs-pauth-jump-table.s |   6 +-
 .../AArch64/gs-pauth-signing-oracles.s|  54 ++---
 .../AArch64/gs-pauth-tail-calls.s | 184 +-
 8 files changed, 318 insertions(+), 227 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index f979b1480d0b1..119cb928337a9 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -14,6 +14,7 @@
 #include "bolt/Passes/PAuthGadgetScanner.h"
 #include "bolt/Core/ParallelUtilities.h"
 #include "bolt/Passes/DataflowAnalysis.h"
+#include "bolt/Utils/CommandLineOpts.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/MC/MCInst.h"
@@ -26,6 +27,11 @@ namespace llvm {
 namespace bolt {
 namespace PAuthGadgetScanner {
 
+static cl::opt AuthTrapsOnFailure(
+"auth-traps-on-failure",
+cl::desc("Assume authentication instructions always trap on failure"),
+cl::cat(opts::BinaryAnalysisCategory));
+
 [[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef 
Label,
const MCInst &MI) {
   dbgs() << "  " << Label << ": ";
@@ -363,6 +369,34 @@ class SrcSafetyAnalysis {
 return Clobbered;
   }
 
+  std::optional getRegMadeTrustedByChecking(const MCInst &Inst,
+   SrcState Cur) const {
+// This functions cannot return multiple registers. This is never the case
+// on AArch64.
+std::optional RegCheckedByInst =
+BC.MIB->getAuthCheckedReg(Inst, /*MayOverwrite=*/false);
+if (RegCheckedByInst && Cur.SafeToDerefRegs[*RegCheckedByInst])
+  return *RegCheckedByInst;
+
+auto It = CheckerSequenceInfo.find(&Inst);
+if (It == CheckerSequenceInfo.end())
+  return std::nullopt;
+
+MCPhysReg RegCheckedBySequence = It->second.first;
+const MCInst *FirstCheckerInst = It->second.second;
+
+// FirstCheckerInst should belong to the same basic block (see the
+// assertion in DataflowSrcSafetyAnalysis::run()), meaning it was
+// deterministically processed a few steps before this instruction.
+const SrcState &StateBeforeChecker = getStateBefore(*FirstCheckerInst);
+
+// The sequence checks the register, but it should be authenticated before.
+if (!StateBeforeChecker.SafeToDerefRegs[RegCheckedBySequence])
+  return std::nullopt;
+
+return RegCheckedBySequence;
+  }
+
   // Returns all registers that can be treated as if they are written by an
   // authentication instruction.
   SmallVector getRegsMadeSafeToDeref(const MCInst &Point,
@@ -385,18 +419,38 @@ class SrcSafetyAnalysis {
 Regs.push_back(DstAndSrc->first);
 }
 
+// Make sure explicit checker sequence keeps register safe-to-dereference
+// when the register would be clobbered according to the regular rules:
+//
+//; LR is safe to dereference here
+//mov   x16, x30  ; start of the sequence, LR is s-t-d right before
+//xpaclri ; clobbers LR, LR is not safe anymore
+//cmp   x30, x16
+//b.eq  1f; end of the sequence: LR is marked as trusted
+//brk   0x1234
+//  1:
+//; at this point LR would be marked as trusted,
+//; but not safe-to-dereference
+//
+// or even just
+//
+//; X1 is safe to dereference here
+//ldr x0, [x1, #8]!
+//; X1 is trusted here, but it was clobbered due to address write-back
+if (auto CheckedReg = getRegMadeTrustedByChecking(Point, Cur))
+  Regs.push_back(*CheckedReg);
+
 return Regs;
   }
 
   // Returns all registers made trusted by this instruction.
   SmallVector getRegsMadeTrusted(const MCInst &Point,
 const SrcState &Cur) const {
+assert(!AuthTrapsOnFailure &&

[llvm-branch-commits] [Github][CI] Make the new premerge system fail jobs on errors (PR #139359)

2025-05-26 Thread Aiden Grossman via llvm-branch-commits

https://github.com/boomanaiden154 updated 
https://github.com/llvm/llvm-project/pull/139359


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [Github][CI] Make the new premerge system fail jobs on errors (PR #139359)

2025-05-26 Thread Aiden Grossman via llvm-branch-commits

https://github.com/boomanaiden154 updated 
https://github.com/llvm/llvm-project/pull/139359


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [Github][CI] Make the new premerge system fail jobs on errors (PR #139359)

2025-05-26 Thread Aiden Grossman via llvm-branch-commits

https://github.com/boomanaiden154 updated 
https://github.com/llvm/llvm-project/pull/139359


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [Github][CI] Make the new premerge system fail jobs on errors (PR #139359)

2025-05-26 Thread Aiden Grossman via llvm-branch-commits

https://github.com/boomanaiden154 updated 
https://github.com/llvm/llvm-project/pull/139359


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSiganture] Add parsing of new number params in StaticSampler (PR #140291)

2025-05-26 Thread Finn Plummer via llvm-branch-commits

https://github.com/inbelic updated 
https://github.com/llvm/llvm-project/pull/140291

>From d6148b7d9815c2543f37d50a7a611e482c99b91c Mon Sep 17 00:00:00 2001
From: Finn Plummer 
Date: Fri, 16 May 2025 16:08:36 +
Subject: [PATCH 1/8] pre-req: add keywords

---
 clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def | 3 +++
 clang/unittests/Lex/LexHLSLRootSignatureTest.cpp| 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def 
b/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def
index 5d16eaa5b72f6..7ca131349fed4 100644
--- a/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def
+++ b/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def
@@ -102,6 +102,9 @@ KEYWORD(offset)
 
 // StaticSampler Keywords:
 KEYWORD(mipLODBias)
+KEYWORD(maxAnisotropy)
+KEYWORD(minLOD)
+KEYWORD(maxLOD)
 
 // Unbounded Enum:
 UNBOUNDED_ENUM(unbounded, "unbounded")
diff --git a/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp 
b/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp
index b610b8f10f8da..575a97e75a05d 100644
--- a/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp
+++ b/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp
@@ -136,7 +136,7 @@ TEST_F(LexHLSLRootSignatureTest, ValidLexAllTokensTest) {
 space visibility flags
 numDescriptors offset
 
-mipLODBias
+mipLODBias maxAnisotropy minLOD maxLOD
 
 unbounded
 DESCRIPTOR_RANGE_OFFSET_APPEND

>From 79d817fe7d8b0c754ff9ea70feb077c65f8d2a25 Mon Sep 17 00:00:00 2001
From: Finn Plummer 
Date: Fri, 16 May 2025 16:10:05 +
Subject: [PATCH 2/8] add parsing for maxAnisotropy

---
 .../clang/Parse/ParseHLSLRootSignature.h  |  1 +
 clang/lib/Parse/ParseHLSLRootSignature.cpp| 20 +++
 .../llvm/Frontend/HLSL/HLSLRootSignature.h|  1 +
 3 files changed, 22 insertions(+)

diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h 
b/clang/include/clang/Parse/ParseHLSLRootSignature.h
index c12b022a030ef..9fc991cfabc6d 100644
--- a/clang/include/clang/Parse/ParseHLSLRootSignature.h
+++ b/clang/include/clang/Parse/ParseHLSLRootSignature.h
@@ -112,6 +112,7 @@ class RootSignatureParser {
   struct ParsedStaticSamplerParams {
 std::optional Reg;
 std::optional MipLODBias;
+std::optional MaxAnisotropy;
   };
   std::optional parseStaticSamplerParams();
 
diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp 
b/clang/lib/Parse/ParseHLSLRootSignature.cpp
index db2e922160062..41e4b31d8ff84 100644
--- a/clang/lib/Parse/ParseHLSLRootSignature.cpp
+++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp
@@ -384,6 +384,9 @@ std::optional 
RootSignatureParser::parseStaticSampler() {
   if (Params->MipLODBias.has_value())
 Sampler.MipLODBias = Params->MipLODBias.value();
 
+  if (Params->MaxAnisotropy.has_value())
+Sampler.MaxAnisotropy= Params->MaxAnisotropy.value();
+
   if (consumeExpectedToken(TokenKind::pu_r_paren,
diag::err_hlsl_unexpected_end_of_params,
/*param of=*/TokenKind::kw_StaticSampler))
@@ -686,6 +689,23 @@ RootSignatureParser::parseStaticSamplerParams() {
 return std::nullopt;
   Params.MipLODBias = (float)*MipLODBias;
 }
+
+// `maxAnisotropy` `=` POS_INT
+if (tryConsumeExpectedToken(TokenKind::kw_maxAnisotropy)) {
+  if (Params.MaxAnisotropy.has_value()) {
+getDiags().Report(CurToken.TokLoc, diag::err_hlsl_rootsig_repeat_param)
+<< CurToken.TokKind;
+return std::nullopt;
+  }
+
+  if (consumeExpectedToken(TokenKind::pu_equal))
+return std::nullopt;
+
+  auto MaxAnisotropy = parseUIntParam();
+  if (!MaxAnisotropy.has_value())
+return std::nullopt;
+  Params.MaxAnisotropy = MaxAnisotropy;
+}
   } while (tryConsumeExpectedToken(TokenKind::pu_comma));
 
   return Params;
diff --git a/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h 
b/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h
index 6b4da48a302bc..b09ea688627d6 100644
--- a/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h
+++ b/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h
@@ -161,6 +161,7 @@ struct DescriptorTableClause {
 struct StaticSampler {
   Register Reg;
   float MipLODBias = 0.f;
+  uint32_t MaxAnisotropy = 16;
 };
 
 /// Models RootElement : RootFlags | RootConstants | RootParam

>From 4ed8cdd9f0d5166c6e391406aa9d7337e750a39c Mon Sep 17 00:00:00 2001
From: Finn Plummer 
Date: Fri, 16 May 2025 16:13:37 +
Subject: [PATCH 3/8] add parsing for minLOD

---
 .../clang/Parse/ParseHLSLRootSignature.h  |  1 +
 clang/lib/Parse/ParseHLSLRootSignature.cpp| 20 +++
 .../llvm/Frontend/HLSL/HLSLRootSignature.h|  1 +
 3 files changed, 22 insertions(+)

diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h 
b/clang/include/clang/Parse/ParseHLSLRootSignature.h
index 9fc991cfabc6d..8c4a945d440e5 100644
--- a/clang/include/clang/Parse/ParseHLSLRootSignature.h
+++ b/clang/include/

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add parsing of floats for StaticSampler (PR #140181)

2025-05-26 Thread Finn Plummer via llvm-branch-commits

https://github.com/inbelic updated 
https://github.com/llvm/llvm-project/pull/140181

>From f9fbe391091fbf23203d6cc997e19d05d92a4a18 Mon Sep 17 00:00:00 2001
From: Finn Plummer 
Date: Thu, 15 May 2025 23:14:10 +
Subject: [PATCH 1/9] pre-req: add missing token to Lexer

---
 clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def | 3 +++
 clang/unittests/Lex/LexHLSLRootSignatureTest.cpp| 2 ++
 2 files changed, 5 insertions(+)

diff --git a/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def 
b/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def
index ddebe82987197..5d16eaa5b72f6 100644
--- a/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def
+++ b/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def
@@ -100,6 +100,9 @@ KEYWORD(flags)
 KEYWORD(numDescriptors)
 KEYWORD(offset)
 
+// StaticSampler Keywords:
+KEYWORD(mipLODBias)
+
 // Unbounded Enum:
 UNBOUNDED_ENUM(unbounded, "unbounded")
 
diff --git a/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp 
b/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp
index 3e38c281f4fb1..b610b8f10f8da 100644
--- a/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp
+++ b/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp
@@ -136,6 +136,8 @@ TEST_F(LexHLSLRootSignatureTest, ValidLexAllTokensTest) {
 space visibility flags
 numDescriptors offset
 
+mipLODBias
+
 unbounded
 DESCRIPTOR_RANGE_OFFSET_APPEND
 

>From f434caece16ba262807968623d719a09f2a455ff Mon Sep 17 00:00:00 2001
From: Finn Plummer 
Date: Thu, 15 May 2025 23:15:04 +
Subject: [PATCH 2/9] pre-req: add parsing of MipLODBias as an uint

- defines a float data member of StaticSampler that will be used to test
functionality of parsing a float
---
 .../clang/Parse/ParseHLSLRootSignature.h  |  1 +
 clang/lib/Parse/ParseHLSLRootSignature.cpp| 21 +++
 .../Parse/ParseHLSLRootSignatureTest.cpp  |  3 ++-
 .../llvm/Frontend/HLSL/HLSLRootSignature.h|  1 +
 4 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h 
b/clang/include/clang/Parse/ParseHLSLRootSignature.h
index 80fedc2f16574..2e85fd3011d05 100644
--- a/clang/include/clang/Parse/ParseHLSLRootSignature.h
+++ b/clang/include/clang/Parse/ParseHLSLRootSignature.h
@@ -111,6 +111,7 @@ class RootSignatureParser {
 
   struct ParsedStaticSamplerParams {
 std::optional Reg;
+std::optional MipLODBias;
   };
   std::optional parseStaticSamplerParams();
 
diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp 
b/clang/lib/Parse/ParseHLSLRootSignature.cpp
index 6e4bb4d59e109..2aa3b1f8e31c9 100644
--- a/clang/lib/Parse/ParseHLSLRootSignature.cpp
+++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp
@@ -378,6 +378,10 @@ std::optional 
RootSignatureParser::parseStaticSampler() {
 
   Sampler.Reg = Params->Reg.value();
 
+  // Fill in optional values
+  if (Params->MipLODBias.has_value())
+Sampler.MipLODBias = Params->MipLODBias.value();
+
   if (consumeExpectedToken(TokenKind::pu_r_paren,
diag::err_hlsl_unexpected_end_of_params,
/*param of=*/TokenKind::kw_StaticSampler))
@@ -663,6 +667,23 @@ RootSignatureParser::parseStaticSamplerParams() {
 return std::nullopt;
   Params.Reg = Reg;
 }
+
+// `mipLODBias` `=` NUMBER
+if (tryConsumeExpectedToken(TokenKind::kw_mipLODBias)) {
+  if (Params.MipLODBias.has_value()) {
+getDiags().Report(CurToken.TokLoc, diag::err_hlsl_rootsig_repeat_param)
+<< CurToken.TokKind;
+return std::nullopt;
+  }
+
+  if (consumeExpectedToken(TokenKind::pu_equal))
+return std::nullopt;
+
+  auto MipLODBias = parseUIntParam();
+  if (!MipLODBias.has_value())
+return std::nullopt;
+  Params.MipLODBias = (float)*MipLODBias;
+}
   } while (tryConsumeExpectedToken(TokenKind::pu_comma));
 
   return Params;
diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp 
b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
index 14c3101f3eafa..31df2b73c2ac1 100644
--- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
+++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
@@ -225,7 +225,7 @@ TEST_F(ParseHLSLRootSignatureTest, ValidParseDTClausesTest) 
{
 
 TEST_F(ParseHLSLRootSignatureTest, ValidParseStaticSamplerTest) {
   const llvm::StringLiteral Source = R"cc(
-StaticSampler(s0)
+StaticSampler(s0, mipLODBias = 0)
   )cc";
 
   TrivialModuleLoader ModLoader;
@@ -247,6 +247,7 @@ TEST_F(ParseHLSLRootSignatureTest, 
ValidParseStaticSamplerTest) {
   ASSERT_TRUE(std::holds_alternative(Elem));
   ASSERT_EQ(std::get(Elem).Reg.ViewType, RegisterType::SReg);
   ASSERT_EQ(std::get(Elem).Reg.Number, 0u);
+  ASSERT_EQ(std::get(Elem).MipLODBias, 0u);
 
   ASSERT_TRUE(Consumer->isSatisfied());
 }
diff --git a/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h 
b/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h
index 25df9a7235ef3..6b4da48a302bc 100644
--- a/llvm/inclu

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add parsing of floats for StaticSampler (PR #140181)

2025-05-26 Thread Finn Plummer via llvm-branch-commits

https://github.com/inbelic updated 
https://github.com/llvm/llvm-project/pull/140181

>From f9fbe391091fbf23203d6cc997e19d05d92a4a18 Mon Sep 17 00:00:00 2001
From: Finn Plummer 
Date: Thu, 15 May 2025 23:14:10 +
Subject: [PATCH 01/10] pre-req: add missing token to Lexer

---
 clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def | 3 +++
 clang/unittests/Lex/LexHLSLRootSignatureTest.cpp| 2 ++
 2 files changed, 5 insertions(+)

diff --git a/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def 
b/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def
index ddebe82987197..5d16eaa5b72f6 100644
--- a/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def
+++ b/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def
@@ -100,6 +100,9 @@ KEYWORD(flags)
 KEYWORD(numDescriptors)
 KEYWORD(offset)
 
+// StaticSampler Keywords:
+KEYWORD(mipLODBias)
+
 // Unbounded Enum:
 UNBOUNDED_ENUM(unbounded, "unbounded")
 
diff --git a/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp 
b/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp
index 3e38c281f4fb1..b610b8f10f8da 100644
--- a/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp
+++ b/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp
@@ -136,6 +136,8 @@ TEST_F(LexHLSLRootSignatureTest, ValidLexAllTokensTest) {
 space visibility flags
 numDescriptors offset
 
+mipLODBias
+
 unbounded
 DESCRIPTOR_RANGE_OFFSET_APPEND
 

>From f434caece16ba262807968623d719a09f2a455ff Mon Sep 17 00:00:00 2001
From: Finn Plummer 
Date: Thu, 15 May 2025 23:15:04 +
Subject: [PATCH 02/10] pre-req: add parsing of MipLODBias as an uint

- defines a float data member of StaticSampler that will be used to test
functionality of parsing a float
---
 .../clang/Parse/ParseHLSLRootSignature.h  |  1 +
 clang/lib/Parse/ParseHLSLRootSignature.cpp| 21 +++
 .../Parse/ParseHLSLRootSignatureTest.cpp  |  3 ++-
 .../llvm/Frontend/HLSL/HLSLRootSignature.h|  1 +
 4 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h 
b/clang/include/clang/Parse/ParseHLSLRootSignature.h
index 80fedc2f16574..2e85fd3011d05 100644
--- a/clang/include/clang/Parse/ParseHLSLRootSignature.h
+++ b/clang/include/clang/Parse/ParseHLSLRootSignature.h
@@ -111,6 +111,7 @@ class RootSignatureParser {
 
   struct ParsedStaticSamplerParams {
 std::optional Reg;
+std::optional MipLODBias;
   };
   std::optional parseStaticSamplerParams();
 
diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp 
b/clang/lib/Parse/ParseHLSLRootSignature.cpp
index 6e4bb4d59e109..2aa3b1f8e31c9 100644
--- a/clang/lib/Parse/ParseHLSLRootSignature.cpp
+++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp
@@ -378,6 +378,10 @@ std::optional 
RootSignatureParser::parseStaticSampler() {
 
   Sampler.Reg = Params->Reg.value();
 
+  // Fill in optional values
+  if (Params->MipLODBias.has_value())
+Sampler.MipLODBias = Params->MipLODBias.value();
+
   if (consumeExpectedToken(TokenKind::pu_r_paren,
diag::err_hlsl_unexpected_end_of_params,
/*param of=*/TokenKind::kw_StaticSampler))
@@ -663,6 +667,23 @@ RootSignatureParser::parseStaticSamplerParams() {
 return std::nullopt;
   Params.Reg = Reg;
 }
+
+// `mipLODBias` `=` NUMBER
+if (tryConsumeExpectedToken(TokenKind::kw_mipLODBias)) {
+  if (Params.MipLODBias.has_value()) {
+getDiags().Report(CurToken.TokLoc, diag::err_hlsl_rootsig_repeat_param)
+<< CurToken.TokKind;
+return std::nullopt;
+  }
+
+  if (consumeExpectedToken(TokenKind::pu_equal))
+return std::nullopt;
+
+  auto MipLODBias = parseUIntParam();
+  if (!MipLODBias.has_value())
+return std::nullopt;
+  Params.MipLODBias = (float)*MipLODBias;
+}
   } while (tryConsumeExpectedToken(TokenKind::pu_comma));
 
   return Params;
diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp 
b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
index 14c3101f3eafa..31df2b73c2ac1 100644
--- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
+++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
@@ -225,7 +225,7 @@ TEST_F(ParseHLSLRootSignatureTest, ValidParseDTClausesTest) 
{
 
 TEST_F(ParseHLSLRootSignatureTest, ValidParseStaticSamplerTest) {
   const llvm::StringLiteral Source = R"cc(
-StaticSampler(s0)
+StaticSampler(s0, mipLODBias = 0)
   )cc";
 
   TrivialModuleLoader ModLoader;
@@ -247,6 +247,7 @@ TEST_F(ParseHLSLRootSignatureTest, 
ValidParseStaticSamplerTest) {
   ASSERT_TRUE(std::holds_alternative(Elem));
   ASSERT_EQ(std::get(Elem).Reg.ViewType, RegisterType::SReg);
   ASSERT_EQ(std::get(Elem).Reg.Number, 0u);
+  ASSERT_EQ(std::get(Elem).MipLODBias, 0u);
 
   ASSERT_TRUE(Consumer->isSatisfied());
 }
diff --git a/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h 
b/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h
index 25df9a7235ef3..6b4da48a302bc 100644
--- a/llvm/i

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add parsing of floats for StaticSampler (PR #140181)

2025-05-26 Thread Finn Plummer via llvm-branch-commits

https://github.com/inbelic updated 
https://github.com/llvm/llvm-project/pull/140181

>From f9fbe391091fbf23203d6cc997e19d05d92a4a18 Mon Sep 17 00:00:00 2001
From: Finn Plummer 
Date: Thu, 15 May 2025 23:14:10 +
Subject: [PATCH 01/10] pre-req: add missing token to Lexer

---
 clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def | 3 +++
 clang/unittests/Lex/LexHLSLRootSignatureTest.cpp| 2 ++
 2 files changed, 5 insertions(+)

diff --git a/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def 
b/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def
index ddebe82987197..5d16eaa5b72f6 100644
--- a/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def
+++ b/clang/include/clang/Lex/HLSLRootSignatureTokenKinds.def
@@ -100,6 +100,9 @@ KEYWORD(flags)
 KEYWORD(numDescriptors)
 KEYWORD(offset)
 
+// StaticSampler Keywords:
+KEYWORD(mipLODBias)
+
 // Unbounded Enum:
 UNBOUNDED_ENUM(unbounded, "unbounded")
 
diff --git a/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp 
b/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp
index 3e38c281f4fb1..b610b8f10f8da 100644
--- a/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp
+++ b/clang/unittests/Lex/LexHLSLRootSignatureTest.cpp
@@ -136,6 +136,8 @@ TEST_F(LexHLSLRootSignatureTest, ValidLexAllTokensTest) {
 space visibility flags
 numDescriptors offset
 
+mipLODBias
+
 unbounded
 DESCRIPTOR_RANGE_OFFSET_APPEND
 

>From f434caece16ba262807968623d719a09f2a455ff Mon Sep 17 00:00:00 2001
From: Finn Plummer 
Date: Thu, 15 May 2025 23:15:04 +
Subject: [PATCH 02/10] pre-req: add parsing of MipLODBias as an uint

- defines a float data member of StaticSampler that will be used to test
functionality of parsing a float
---
 .../clang/Parse/ParseHLSLRootSignature.h  |  1 +
 clang/lib/Parse/ParseHLSLRootSignature.cpp| 21 +++
 .../Parse/ParseHLSLRootSignatureTest.cpp  |  3 ++-
 .../llvm/Frontend/HLSL/HLSLRootSignature.h|  1 +
 4 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h 
b/clang/include/clang/Parse/ParseHLSLRootSignature.h
index 80fedc2f16574..2e85fd3011d05 100644
--- a/clang/include/clang/Parse/ParseHLSLRootSignature.h
+++ b/clang/include/clang/Parse/ParseHLSLRootSignature.h
@@ -111,6 +111,7 @@ class RootSignatureParser {
 
   struct ParsedStaticSamplerParams {
 std::optional Reg;
+std::optional MipLODBias;
   };
   std::optional parseStaticSamplerParams();
 
diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp 
b/clang/lib/Parse/ParseHLSLRootSignature.cpp
index 6e4bb4d59e109..2aa3b1f8e31c9 100644
--- a/clang/lib/Parse/ParseHLSLRootSignature.cpp
+++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp
@@ -378,6 +378,10 @@ std::optional 
RootSignatureParser::parseStaticSampler() {
 
   Sampler.Reg = Params->Reg.value();
 
+  // Fill in optional values
+  if (Params->MipLODBias.has_value())
+Sampler.MipLODBias = Params->MipLODBias.value();
+
   if (consumeExpectedToken(TokenKind::pu_r_paren,
diag::err_hlsl_unexpected_end_of_params,
/*param of=*/TokenKind::kw_StaticSampler))
@@ -663,6 +667,23 @@ RootSignatureParser::parseStaticSamplerParams() {
 return std::nullopt;
   Params.Reg = Reg;
 }
+
+// `mipLODBias` `=` NUMBER
+if (tryConsumeExpectedToken(TokenKind::kw_mipLODBias)) {
+  if (Params.MipLODBias.has_value()) {
+getDiags().Report(CurToken.TokLoc, diag::err_hlsl_rootsig_repeat_param)
+<< CurToken.TokKind;
+return std::nullopt;
+  }
+
+  if (consumeExpectedToken(TokenKind::pu_equal))
+return std::nullopt;
+
+  auto MipLODBias = parseUIntParam();
+  if (!MipLODBias.has_value())
+return std::nullopt;
+  Params.MipLODBias = (float)*MipLODBias;
+}
   } while (tryConsumeExpectedToken(TokenKind::pu_comma));
 
   return Params;
diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp 
b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
index 14c3101f3eafa..31df2b73c2ac1 100644
--- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
+++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
@@ -225,7 +225,7 @@ TEST_F(ParseHLSLRootSignatureTest, ValidParseDTClausesTest) 
{
 
 TEST_F(ParseHLSLRootSignatureTest, ValidParseStaticSamplerTest) {
   const llvm::StringLiteral Source = R"cc(
-StaticSampler(s0)
+StaticSampler(s0, mipLODBias = 0)
   )cc";
 
   TrivialModuleLoader ModLoader;
@@ -247,6 +247,7 @@ TEST_F(ParseHLSLRootSignatureTest, 
ValidParseStaticSamplerTest) {
   ASSERT_TRUE(std::holds_alternative(Elem));
   ASSERT_EQ(std::get(Elem).Reg.ViewType, RegisterType::SReg);
   ASSERT_EQ(std::get(Elem).Reg.Number, 0u);
+  ASSERT_EQ(std::get(Elem).MipLODBias, 0u);
 
   ASSERT_TRUE(Consumer->isSatisfied());
 }
diff --git a/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h 
b/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h
index 25df9a7235ef3..6b4da48a302bc 100644
--- a/llvm/i

[llvm-branch-commits] [compiler-rt] compiler-rt: Introduce runtime functions for emulated PAC. (PR #133530)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits


@@ -0,0 +1,133 @@
+//===--- emupac.cpp - Emulated PAC implementation 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+//  This file implements Emulated PAC using SipHash_1_3 as the IMPDEF hashing
+//  scheme.
+//
+//===--===//
+
+#include 
+
+#include "siphash/SipHash.h"
+
+// EmuPAC implements runtime emulation of PAC instructions. If the current
+// CPU supports PAC, EmuPAC uses real PAC instructions. Otherwise, it uses the
+// emulation, which is effectively an implementation of PAC with an IMPDEF
+// hashing scheme based on SipHash_1_3.
+//
+// The purpose of the emulation is to allow programs to be built to be portable
+// to machines without PAC support, with some performance loss and increased
+// probability of false positives (due to not being able to portably determine
+// the VA size), while being functionally almost equivalent to running on a
+// machine with PAC support. One example of a use case is if PAC is used in
+// production as a security mitigation, but the testing environment is
+// heterogeneous (i.e. some machines lack PAC support). In this case we would
+// like the testing machines to be able to detect issues resulting
+// from the use of PAC instructions that would affect production by running
+// tests. This can be achieved by building test binaries with EmuPAC and
+// production binaries with real PAC.
+//
+// The emulation assumes that the VA size is at most 48 bits. The architecture
+// as of ARMv8.2, which was the last architecture version in which PAC was not
+// mandatory, permitted VA size up to 52 bits via ARMv8.2-LVA, but we are
+// unaware of an ARMv8.2 CPU that implemented ARMv8.2-LVA.
+
+const uint64_t kMaxVASize = 48;
+const uint64_t kPACMask = ((1ULL << 55) - 1) & ~((1ULL << kMaxVASize) - 1);
+const uint64_t kTTBR1Mask = 1ULL << 55;
+
+// Determine whether PAC is supported without accessing memory. This utilizes
+// the XPACLRI instruction which will copy bit 55 of x30 into at least bit 54 
if
+// PAC is supported and acts as a NOP if PAC is not supported.
+static bool pac_supported() {
+  register uintptr_t x30 __asm__("x30") = 1ULL << 55;
+  __asm__ __volatile__("xpaclri" : "+r"(x30));
+  return x30 & (1ULL << 54);
+}
+
+// This asm snippet is used to force the creation of a frame record when
+// calling the EmuPAC functions. This is important because the EmuPAC functions
+// may crash if an auth failure is detected and may be unwound past using a
+// frame pointer based unwinder.
+#ifdef __GCC_HAVE_DWARF2_CFI_ASM
+#define CFI_INST(inst) inst
+#else
+#define CFI_INST(inst)
+#endif
+
+// clang-format off
+#define FRAME_POINTER_WRAP(sym) \
+  "stp x29, x30, [sp, #-16]!\n" \
+  CFI_INST(".cfi_def_cfa_offset 16\n") \
+  "mov x29, sp\n" \
+  CFI_INST(".cfi_def_cfa w29, 16\n") \
+  CFI_INST(".cfi_offset w30, -8\n") \
+  CFI_INST(".cfi_offset w29, -16\n") \
+  "bl " #sym "\n" \
+  CFI_INST(".cfi_def_cfa wsp, 16\n") \
+  "ldp x29, x30, [sp], #16\n" \
+  CFI_INST(".cfi_def_cfa_offset 0\n") \
+  CFI_INST(".cfi_restore w30\n") \
+  CFI_INST(".cfi_restore w29\n") \
+  "ret"
+// clang-format on
+
+static const uint8_t K[16] = {0xb5, 0xd4, 0xc9, 0xeb, 0x79, 0x10, 0x4a, 0x79,
+  0x6f, 0xec, 0x8b, 0x1b, 0x42, 0x87, 0x81, 0xd4};
+
+__attribute__((flatten)) extern "C" uint64_t
+__emupac_pacda_impl(uint64_t ptr, uint64_t disc) {
+  if (pac_supported()) {
+__asm__ __volatile__(".arch_extension pauth\npacda %0, %1"
+ : "+r"(ptr)
+ : "r"(disc));
+return ptr;
+  }
+  if (ptr & kTTBR1Mask) {
+if ((ptr & kPACMask) != kPACMask) {
+  return ptr | kPACMask;
+}
+  } else {
+if (ptr & kPACMask) {
+  return ptr & ~kPACMask;
+}
+  }
+  uint64_t hash;
+  siphash<1, 3>(reinterpret_cast(&ptr), 8, K,
+*reinterpret_cast(&hash));
+  return (ptr & ~kPACMask) | (hash & kPACMask);
+}
+
+extern "C" __attribute__((naked)) uint64_t __emupac_pacda(uint64_t ptr,
+  uint64_t disc) {
+  __asm__(FRAME_POINTER_WRAP(__emupac_pacda_impl));
+}
+
+__attribute__((flatten)) extern "C" uint64_t
+__emupac_autda_impl(uint64_t ptr, uint64_t disc) {
+  if (pac_supported()) {
+__asm__ __volatile__(".arch_extension pauth\nautda %0, %1"
+ : "+r"(ptr)
+ : "r"(disc));
+return ptr;
+  }
+  uint64_t ptr_without_pac =

atrosinenko wrote:

[nit] According to standard LLVM naming conventions, it should be 
`PtrWithoutPac`, though I'm not sure whether naming conventions are strict for 
compiler-rt. The same applies to other 

[llvm-branch-commits] [compiler-rt] compiler-rt: Introduce runtime functions for emulated PAC. (PR #133530)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits


@@ -172,7 +172,7 @@ function(add_compiler_rt_runtime name type)
   cmake_parse_arguments(LIB
 ""
 "PARENT_TARGET"
-
"OS;ARCHS;SOURCES;CFLAGS;LINK_FLAGS;DEFS;DEPS;LINK_LIBS;OBJECT_LIBS;ADDITIONAL_HEADERS;EXTENSIONS"
+
"OS;ARCHS;SOURCES;CFLAGS;LINK_FLAGS;DEFS;DEPS;LINK_LIBS;OBJECT_LIBS;ADDITIONAL_HEADERS;EXTENSIONS;C_STANDARD;CXX_STANDARD"

atrosinenko wrote:

`(C|CXX)_STANDARD` should probably go to `` argument of 
[cmake_parse_arguments](https://cmake.org/cmake/help/v3.20/command/cmake_parse_arguments.html),
 same as `PARENT_TARGET`.

[nit] The new options are not mentioned in the above comment (the description 
of `add_compiler_rt_runtime`).

https://github.com/llvm/llvm-project/pull/133530
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] compiler-rt: Introduce runtime functions for emulated PAC. (PR #133530)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits


@@ -0,0 +1,133 @@
+//===--- emupac.cpp - Emulated PAC implementation 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+//  This file implements Emulated PAC using SipHash_1_3 as the IMPDEF hashing
+//  scheme.
+//
+//===--===//
+
+#include 
+
+#include "siphash/SipHash.h"
+
+// EmuPAC implements runtime emulation of PAC instructions. If the current
+// CPU supports PAC, EmuPAC uses real PAC instructions. Otherwise, it uses the
+// emulation, which is effectively an implementation of PAC with an IMPDEF
+// hashing scheme based on SipHash_1_3.
+//
+// The purpose of the emulation is to allow programs to be built to be portable
+// to machines without PAC support, with some performance loss and increased
+// probability of false positives (due to not being able to portably determine
+// the VA size), while being functionally almost equivalent to running on a
+// machine with PAC support. One example of a use case is if PAC is used in
+// production as a security mitigation, but the testing environment is
+// heterogeneous (i.e. some machines lack PAC support). In this case we would
+// like the testing machines to be able to detect issues resulting
+// from the use of PAC instructions that would affect production by running
+// tests. This can be achieved by building test binaries with EmuPAC and
+// production binaries with real PAC.
+//
+// The emulation assumes that the VA size is at most 48 bits. The architecture
+// as of ARMv8.2, which was the last architecture version in which PAC was not
+// mandatory, permitted VA size up to 52 bits via ARMv8.2-LVA, but we are
+// unaware of an ARMv8.2 CPU that implemented ARMv8.2-LVA.
+
+const uint64_t kMaxVASize = 48;
+const uint64_t kPACMask = ((1ULL << 55) - 1) & ~((1ULL << kMaxVASize) - 1);
+const uint64_t kTTBR1Mask = 1ULL << 55;
+
+// Determine whether PAC is supported without accessing memory. This utilizes
+// the XPACLRI instruction which will copy bit 55 of x30 into at least bit 54 
if
+// PAC is supported and acts as a NOP if PAC is not supported.
+static bool pac_supported() {
+  register uintptr_t x30 __asm__("x30") = 1ULL << 55;
+  __asm__ __volatile__("xpaclri" : "+r"(x30));
+  return x30 & (1ULL << 54);
+}
+
+// This asm snippet is used to force the creation of a frame record when
+// calling the EmuPAC functions. This is important because the EmuPAC functions
+// may crash if an auth failure is detected and may be unwound past using a
+// frame pointer based unwinder.
+#ifdef __GCC_HAVE_DWARF2_CFI_ASM
+#define CFI_INST(inst) inst
+#else
+#define CFI_INST(inst)
+#endif
+
+// clang-format off
+#define FRAME_POINTER_WRAP(sym) \
+  "stp x29, x30, [sp, #-16]!\n" \
+  CFI_INST(".cfi_def_cfa_offset 16\n") \
+  "mov x29, sp\n" \
+  CFI_INST(".cfi_def_cfa w29, 16\n") \
+  CFI_INST(".cfi_offset w30, -8\n") \
+  CFI_INST(".cfi_offset w29, -16\n") \
+  "bl " #sym "\n" \
+  CFI_INST(".cfi_def_cfa wsp, 16\n") \
+  "ldp x29, x30, [sp], #16\n" \
+  CFI_INST(".cfi_def_cfa_offset 0\n") \
+  CFI_INST(".cfi_restore w30\n") \
+  CFI_INST(".cfi_restore w29\n") \
+  "ret"
+// clang-format on
+
+static const uint8_t K[16] = {0xb5, 0xd4, 0xc9, 0xeb, 0x79, 0x10, 0x4a, 0x79,
+  0x6f, 0xec, 0x8b, 0x1b, 0x42, 0x87, 0x81, 0xd4};

atrosinenko wrote:

This probably deserves a comment, something like "emulated DA key value".

https://github.com/llvm/llvm-project/pull/133530
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] compiler-rt: Introduce runtime functions for emulated PAC. (PR #133530)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits


@@ -0,0 +1,133 @@
+//===--- emupac.cpp - Emulated PAC implementation 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+//  This file implements Emulated PAC using SipHash_1_3 as the IMPDEF hashing
+//  scheme.
+//
+//===--===//
+
+#include 
+
+#include "siphash/SipHash.h"
+
+// EmuPAC implements runtime emulation of PAC instructions. If the current
+// CPU supports PAC, EmuPAC uses real PAC instructions. Otherwise, it uses the
+// emulation, which is effectively an implementation of PAC with an IMPDEF
+// hashing scheme based on SipHash_1_3.
+//
+// The purpose of the emulation is to allow programs to be built to be portable
+// to machines without PAC support, with some performance loss and increased
+// probability of false positives (due to not being able to portably determine
+// the VA size), while being functionally almost equivalent to running on a
+// machine with PAC support. One example of a use case is if PAC is used in
+// production as a security mitigation, but the testing environment is
+// heterogeneous (i.e. some machines lack PAC support). In this case we would
+// like the testing machines to be able to detect issues resulting
+// from the use of PAC instructions that would affect production by running
+// tests. This can be achieved by building test binaries with EmuPAC and
+// production binaries with real PAC.

atrosinenko wrote:

I wonder if it should be explicitly stated that EmuPAC is not intended to be 
used in production (something like it is done for Address Sanitizer: 
https://clang.llvm.org/docs/AddressSanitizer.html#security-considerations).

https://github.com/llvm/llvm-project/pull/133530
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] compiler-rt: Introduce runtime functions for emulated PAC. (PR #133530)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits


@@ -0,0 +1,133 @@
+//===--- emupac.cpp - Emulated PAC implementation 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+//  This file implements Emulated PAC using SipHash_1_3 as the IMPDEF hashing
+//  scheme.
+//
+//===--===//
+
+#include 
+
+#include "siphash/SipHash.h"
+
+// EmuPAC implements runtime emulation of PAC instructions. If the current
+// CPU supports PAC, EmuPAC uses real PAC instructions. Otherwise, it uses the
+// emulation, which is effectively an implementation of PAC with an IMPDEF
+// hashing scheme based on SipHash_1_3.
+//
+// The purpose of the emulation is to allow programs to be built to be portable
+// to machines without PAC support, with some performance loss and increased
+// probability of false positives (due to not being able to portably determine
+// the VA size), while being functionally almost equivalent to running on a
+// machine with PAC support. One example of a use case is if PAC is used in
+// production as a security mitigation, but the testing environment is
+// heterogeneous (i.e. some machines lack PAC support). In this case we would
+// like the testing machines to be able to detect issues resulting
+// from the use of PAC instructions that would affect production by running
+// tests. This can be achieved by building test binaries with EmuPAC and
+// production binaries with real PAC.
+//
+// The emulation assumes that the VA size is at most 48 bits. The architecture
+// as of ARMv8.2, which was the last architecture version in which PAC was not
+// mandatory, permitted VA size up to 52 bits via ARMv8.2-LVA, but we are
+// unaware of an ARMv8.2 CPU that implemented ARMv8.2-LVA.
+
+const uint64_t kMaxVASize = 48;
+const uint64_t kPACMask = ((1ULL << 55) - 1) & ~((1ULL << kMaxVASize) - 1);
+const uint64_t kTTBR1Mask = 1ULL << 55;
+
+// Determine whether PAC is supported without accessing memory. This utilizes
+// the XPACLRI instruction which will copy bit 55 of x30 into at least bit 54 
if
+// PAC is supported and acts as a NOP if PAC is not supported.
+static bool pac_supported() {

atrosinenko wrote:

[nit] It looks like functions under `compiler-rt/lib/builtins` which are not 
part of the public interface generally follow the standard naming conventions.
```suggestion
static bool pacSupported() {
```

https://github.com/llvm/llvm-project/pull/133530
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] compiler-rt: Introduce runtime functions for emulated PAC. (PR #133530)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits


@@ -0,0 +1,62 @@
+// REQUIRES: librt_has_emupac
+// RUN: %clang_builtins %s %librt -o %t
+// RUN: %run %t 1
+// RUN: %run %t 2
+// RUN: %expect_crash %run %t 3
+// RUN: %expect_crash %run %t 4
+
+#include 
+#include 
+#include 
+
+uint64_t __emupac_pacda(uint64_t ptr, uint64_t disc);
+uint64_t __emupac_autda(uint64_t ptr, uint64_t disc);
+
+int main(int argc, char **argv) {
+  char stack_object1;
+  uint64_t ptr1 = (uint64_t)stack_object1;

atrosinenko wrote:

`(uint64_t)&stack_object1` was probably intended (the same for `stack_object2`).

https://github.com/llvm/llvm-project/pull/133530
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [MachO] Improve bounds check (#141083) (PR #141461)

2025-05-26 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-binary-utilities

Author: None (llvmbot)


Changes

Backport 3f29acb51739a3e6bfb8cc623eb37cb734c98a63

Requested by: @nikic

---
Full diff: https://github.com/llvm/llvm-project/pull/141461.diff


1 Files Affected:

- (modified) llvm/lib/Object/MachOObjectFile.cpp (+2-1) 


``diff
diff --git a/llvm/lib/Object/MachOObjectFile.cpp 
b/llvm/lib/Object/MachOObjectFile.cpp
index 69d36e6a77db7..5db264207ffb7 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -192,7 +192,8 @@ static Expected
 getLoadCommandInfo(const MachOObjectFile &Obj, const char *Ptr,
uint32_t LoadCommandIndex) {
   if (auto CmdOrErr = getStructOrErr(Obj, Ptr)) {
-if (CmdOrErr->cmdsize + Ptr > Obj.getData().end())
+assert(Ptr <= Obj.getData().end() && "Start must be before end");
+if (CmdOrErr->cmdsize > (uintptr_t)(Obj.getData().end() - Ptr))
   return malformedError("load command " + Twine(LoadCommandIndex) +
 " extends past end of file");
 if (CmdOrErr->cmdsize < 8)

``




https://github.com/llvm/llvm-project/pull/141461
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [MachO] Improve bounds check (#141083) (PR #141461)

2025-05-26 Thread via llvm-branch-commits

https://github.com/llvmbot updated 
https://github.com/llvm/llvm-project/pull/141461

>From 5d99a97583e148e2c0ad462bb35292366105e188 Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Mon, 26 May 2025 09:43:00 +0200
Subject: [PATCH] [MachO] Improve bounds check (#141083)

The current check may fail if the addition overflows. I've observed
failures of macho-invalid.test on 32-bit due to this.

Instead, compare against the remaining bytes until the end of the
object.

(cherry picked from commit 3f29acb51739a3e6bfb8cc623eb37cb734c98a63)
---
 llvm/lib/Object/MachOObjectFile.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Object/MachOObjectFile.cpp 
b/llvm/lib/Object/MachOObjectFile.cpp
index 69d36e6a77db7..5db264207ffb7 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -192,7 +192,8 @@ static Expected
 getLoadCommandInfo(const MachOObjectFile &Obj, const char *Ptr,
uint32_t LoadCommandIndex) {
   if (auto CmdOrErr = getStructOrErr(Obj, Ptr)) {
-if (CmdOrErr->cmdsize + Ptr > Obj.getData().end())
+assert(Ptr <= Obj.getData().end() && "Start must be before end");
+if (CmdOrErr->cmdsize > (uintptr_t)(Obj.getData().end() - Ptr))
   return malformedError("load command " + Twine(LoadCommandIndex) +
 " extends past end of file");
 if (CmdOrErr->cmdsize < 8)

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [MachO] Improve bounds check (#141083) (PR #141461)

2025-05-26 Thread Nikita Popov via llvm-branch-commits

https://github.com/nikic milestoned 
https://github.com/llvm/llvm-project/pull/141461
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: optionally assume auth traps on failure (PR #139778)

2025-05-26 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/139778

>From 2c67c3f02d8474185ad6998142b05c1e7a0eeffa Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Tue, 13 May 2025 19:50:41 +0300
Subject: [PATCH] [BOLT] Gadget scanner: optionally assume auth traps on
 failure

On AArch64 it is possible for an auth instruction to either return an
invalid address value on failure (without FEAT_FPAC) or generate an
error (with FEAT_FPAC). It thus may be possible to never emit explicit
pointer checks, if the target CPU is known to support FEAT_FPAC.

This commit implements an --auth-traps-on-failure command line option,
which essentially makes "safe-to-dereference" and "trusted" register
properties identical and disables scanning for authentication oracles
completely.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp| 112 +++
 .../binary-analysis/AArch64/cmdline-args.test |   1 +
 .../AArch64/gs-pauth-authentication-oracles.s |   6 +-
 .../binary-analysis/AArch64/gs-pauth-calls.s  |   5 +-
 .../AArch64/gs-pauth-debug-output.s   | 177 ++---
 .../AArch64/gs-pauth-jump-table.s |   6 +-
 .../AArch64/gs-pauth-signing-oracles.s|  54 ++---
 .../AArch64/gs-pauth-tail-calls.s | 184 +-
 8 files changed, 318 insertions(+), 227 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp 
b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index f979b1480d0b1..119cb928337a9 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -14,6 +14,7 @@
 #include "bolt/Passes/PAuthGadgetScanner.h"
 #include "bolt/Core/ParallelUtilities.h"
 #include "bolt/Passes/DataflowAnalysis.h"
+#include "bolt/Utils/CommandLineOpts.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/MC/MCInst.h"
@@ -26,6 +27,11 @@ namespace llvm {
 namespace bolt {
 namespace PAuthGadgetScanner {
 
+static cl::opt AuthTrapsOnFailure(
+"auth-traps-on-failure",
+cl::desc("Assume authentication instructions always trap on failure"),
+cl::cat(opts::BinaryAnalysisCategory));
+
 [[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef 
Label,
const MCInst &MI) {
   dbgs() << "  " << Label << ": ";
@@ -363,6 +369,34 @@ class SrcSafetyAnalysis {
 return Clobbered;
   }
 
+  std::optional getRegMadeTrustedByChecking(const MCInst &Inst,
+   SrcState Cur) const {
+// This functions cannot return multiple registers. This is never the case
+// on AArch64.
+std::optional RegCheckedByInst =
+BC.MIB->getAuthCheckedReg(Inst, /*MayOverwrite=*/false);
+if (RegCheckedByInst && Cur.SafeToDerefRegs[*RegCheckedByInst])
+  return *RegCheckedByInst;
+
+auto It = CheckerSequenceInfo.find(&Inst);
+if (It == CheckerSequenceInfo.end())
+  return std::nullopt;
+
+MCPhysReg RegCheckedBySequence = It->second.first;
+const MCInst *FirstCheckerInst = It->second.second;
+
+// FirstCheckerInst should belong to the same basic block (see the
+// assertion in DataflowSrcSafetyAnalysis::run()), meaning it was
+// deterministically processed a few steps before this instruction.
+const SrcState &StateBeforeChecker = getStateBefore(*FirstCheckerInst);
+
+// The sequence checks the register, but it should be authenticated before.
+if (!StateBeforeChecker.SafeToDerefRegs[RegCheckedBySequence])
+  return std::nullopt;
+
+return RegCheckedBySequence;
+  }
+
   // Returns all registers that can be treated as if they are written by an
   // authentication instruction.
   SmallVector getRegsMadeSafeToDeref(const MCInst &Point,
@@ -385,18 +419,38 @@ class SrcSafetyAnalysis {
 Regs.push_back(DstAndSrc->first);
 }
 
+// Make sure explicit checker sequence keeps register safe-to-dereference
+// when the register would be clobbered according to the regular rules:
+//
+//; LR is safe to dereference here
+//mov   x16, x30  ; start of the sequence, LR is s-t-d right before
+//xpaclri ; clobbers LR, LR is not safe anymore
+//cmp   x30, x16
+//b.eq  1f; end of the sequence: LR is marked as trusted
+//brk   0x1234
+//  1:
+//; at this point LR would be marked as trusted,
+//; but not safe-to-dereference
+//
+// or even just
+//
+//; X1 is safe to dereference here
+//ldr x0, [x1, #8]!
+//; X1 is trusted here, but it was clobbered due to address write-back
+if (auto CheckedReg = getRegMadeTrustedByChecking(Point, Cur))
+  Regs.push_back(*CheckedReg);
+
 return Regs;
   }
 
   // Returns all registers made trusted by this instruction.
   SmallVector getRegsMadeTrusted(const MCInst &Point,
 const SrcState &Cur) const {
+assert(!AuthTrapsOnFailure &&