date:20250305

[llvm-branch-commits] [llvm] release/20.x: [AArch64] Fix BE popcount casts. (#129879) (PR #129996)

2025-03-05 Thread via llvm-branch-commits


https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/129996

Backport b673a59c9ae5 ab811e75734a

Requested by: @davemgreen

>From e0f31d9f2345b4ddf4ac96e8275524aac5e827d4 Mon Sep 17 00:00:00 2001
From: David Green 
Date: Wed, 5 Mar 2025 11:23:33 +
Subject: [PATCH 1/2] [AArch64] Add BE test coverage for popcount. NFC

For #129843

(cherry picked from commit b673a59c9ae5583aa08a8d34a48f9409b660d826)
---
 llvm/test/CodeGen/AArch64/arm64-popcnt.ll | 161 ++
 llvm/test/CodeGen/AArch64/popcount.ll | 104 ++
 2 files changed, 265 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll 
b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
index ad0904ff98080..369667ec33f66 100644
--- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -2,6 +2,7 @@
 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 ; RUN: llc < %s -mtriple=aarch64 -mattr -neon -aarch64-neon-syntax=apple | 
FileCheck -check-prefix=CHECK-NONEON %s
 ; RUN: llc < %s -mtriple=aarch64 -mattr +cssc -aarch64-neon-syntax=apple | 
FileCheck -check-prefix=CHECK-CSSC %s
+; RUN: llc < %s -mtriple=aarch64_be-none-eabi | FileCheck %s 
--check-prefix=CHECK-BE
 
 define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
 ; CHECK-LABEL: cnt32_advsimd:
@@ -32,6 +33,14 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
 ; CHECK-CSSC:   // %bb.0:
 ; CHECK-CSSC-NEXT:cnt w0, w0
 ; CHECK-CSSC-NEXT:ret
+;
+; CHECK-BE-LABEL: cnt32_advsimd:
+; CHECK-BE:   // %bb.0:
+; CHECK-BE-NEXT:fmov s0, w0
+; CHECK-BE-NEXT:cnt v0.8b, v0.8b
+; CHECK-BE-NEXT:addv b0, v0.8b
+; CHECK-BE-NEXT:fmov w0, s0
+; CHECK-BE-NEXT:ret
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
 }
@@ -69,6 +78,16 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
 ; CHECK-CSSC-NEXT:fmov w8, s0
 ; CHECK-CSSC-NEXT:cnt w0, w8
 ; CHECK-CSSC-NEXT:ret
+;
+; CHECK-BE-LABEL: cnt32_advsimd_2:
+; CHECK-BE:   // %bb.0:
+; CHECK-BE-NEXT:rev64 v0.2s, v0.2s
+; CHECK-BE-NEXT:fmov w8, s0
+; CHECK-BE-NEXT:fmov s0, w8
+; CHECK-BE-NEXT:cnt v0.8b, v0.8b
+; CHECK-BE-NEXT:addv b0, v0.8b
+; CHECK-BE-NEXT:fmov w0, s0
+; CHECK-BE-NEXT:ret
   %1 = extractelement <2 x i32> %x, i64 0
   %2 = tail call i32 @llvm.ctpop.i32(i32 %1)
   ret i32 %2
@@ -103,6 +122,16 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
 ; CHECK-CSSC:   // %bb.0:
 ; CHECK-CSSC-NEXT:cnt x0, x0
 ; CHECK-CSSC-NEXT:ret
+;
+; CHECK-BE-LABEL: cnt64_advsimd:
+; CHECK-BE:   // %bb.0:
+; CHECK-BE-NEXT:fmov d0, x0
+; CHECK-BE-NEXT:rev64 v0.8b, v0.8b
+; CHECK-BE-NEXT:cnt v0.8b, v0.8b
+; CHECK-BE-NEXT:addv b0, v0.8b
+; CHECK-BE-NEXT:rev64 v0.8b, v0.8b
+; CHECK-BE-NEXT:fmov x0, d0
+; CHECK-BE-NEXT:ret
   %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
   ret i64 %cnt
 }
@@ -147,6 +176,22 @@ define i32 @cnt32(i32 %x) nounwind readnone 
noimplicitfloat {
 ; CHECK-CSSC:   // %bb.0:
 ; CHECK-CSSC-NEXT:cnt w0, w0
 ; CHECK-CSSC-NEXT:ret
+;
+; CHECK-BE-LABEL: cnt32:
+; CHECK-BE:   // %bb.0:
+; CHECK-BE-NEXT:lsr w9, w0, #1
+; CHECK-BE-NEXT:mov w8, #16843009 // =0x1010101
+; CHECK-BE-NEXT:and w9, w9, #0x
+; CHECK-BE-NEXT:sub w9, w0, w9
+; CHECK-BE-NEXT:lsr w10, w9, #2
+; CHECK-BE-NEXT:and w9, w9, #0x
+; CHECK-BE-NEXT:and w10, w10, #0x
+; CHECK-BE-NEXT:add w9, w9, w10
+; CHECK-BE-NEXT:add w9, w9, w9, lsr #4
+; CHECK-BE-NEXT:and w9, w9, #0xf0f0f0f
+; CHECK-BE-NEXT:mul w8, w9, w8
+; CHECK-BE-NEXT:lsr w0, w8, #24
+; CHECK-BE-NEXT:ret
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
 }
@@ -188,6 +233,22 @@ define i64 @cnt64(i64 %x) nounwind readnone 
noimplicitfloat {
 ; CHECK-CSSC:   // %bb.0:
 ; CHECK-CSSC-NEXT:cnt x0, x0
 ; CHECK-CSSC-NEXT:ret
+;
+; CHECK-BE-LABEL: cnt64:
+; CHECK-BE:   // %bb.0:
+; CHECK-BE-NEXT:lsr x9, x0, #1
+; CHECK-BE-NEXT:mov x8, #72340172838076673 // =0x101010101010101
+; CHECK-BE-NEXT:and x9, x9, #0x
+; CHECK-BE-NEXT:sub x9, x0, x9
+; CHECK-BE-NEXT:lsr x10, x9, #2
+; CHECK-BE-NEXT:and x9, x9, #0x
+; CHECK-BE-NEXT:and x10, x10, #0x
+; CHECK-BE-NEXT:add x9, x9, x10
+; CHECK-BE-NEXT:add x9, x9, x9, lsr #4
+; CHECK-BE-NEXT:and x9, x9, #0xf0f0f0f0f0f0f0f
+; CHECK-BE-NEXT:mul x8, x9, x8
+; CHECK-BE-NEXT:lsr x0, x8, #56
+; CHECK-BE-NEXT:ret
   %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
   ret i64 %cnt
 }
@@ -215,6 +276,14 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
 ; CHECK-CSSC-NEXT:cmp x8, #1
 ; CHECK-CSSC-NEXT:cset w0, eq
 ; CHECK-CSSC-NEXT:ret
+;
+; CHECK-BE-LABEL: ctpop_eq_one:
+; CHECK-BE:   // %bb.0:
+; CHECK-BE-NEXT:sub x8, x0, #1
+; CHECK-BE-NEXT:eor x9, x0, x8
+; CHECK-BE-NEXT:cmp x9, x8
+; CHECK-BE-NEXT:cset w0, hi
+; CHECK

[llvm-branch-commits] [llvm] release/20.x: [AArch64] Fix BE popcount casts. (#129879) (PR #129996)

2025-03-05 Thread via llvm-branch-commits


https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/129996
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/20.x: [AArch64] Fix BE popcount casts. (#129879) (PR #129996)

2025-03-05 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: None (llvmbot)


Changes

Backport b673a59c9ae5 ab811e75734a

Requested by: @davemgreen

---
Full diff: https://github.com/llvm/llvm-project/pull/129996.diff


4 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+8-2) 
- (modified) llvm/test/CodeGen/AArch64/arm64-popcnt.ll (+159) 
- (modified) llvm/test/CodeGen/AArch64/parity.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/popcount.ll (+104-1) 


``diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b5cca88b6b511..ca357382c472d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10783,7 +10783,10 @@ SDValue 
AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
 if (VT == MVT::i32)
   AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, AddV,
  DAG.getConstant(0, DL, MVT::i64));
-AddV = DAG.getNode(ISD::BITCAST, DL, VT, AddV);
+else
+  AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v1i64, AddV),
+ DAG.getConstant(0, DL, MVT::i64));
 if (IsParity)
   AddV = DAG.getNode(ISD::AND, DL, VT, AddV, DAG.getConstant(1, DL, VT));
 return AddV;
@@ -10792,7 +10795,10 @@ SDValue 
AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
 
 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
 SDValue AddV = DAG.getNode(AArch64ISD::UADDV, DL, MVT::v16i8, CtPop);
-AddV = DAG.getNode(ISD::BITCAST, DL, VT, AddV);
+AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64,
+   DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v2i64, AddV),
+   DAG.getConstant(0, DL, MVT::i64));
+AddV = DAG.getZExtOrTrunc(AddV, DL, VT);
 if (IsParity)
   AddV = DAG.getNode(ISD::AND, DL, VT, AddV, DAG.getConstant(1, DL, VT));
 return AddV;
diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll 
b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
index ad0904ff98080..d06e42f5405ef 100644
--- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -2,6 +2,7 @@
 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 ; RUN: llc < %s -mtriple=aarch64 -mattr -neon -aarch64-neon-syntax=apple | 
FileCheck -check-prefix=CHECK-NONEON %s
 ; RUN: llc < %s -mtriple=aarch64 -mattr +cssc -aarch64-neon-syntax=apple | 
FileCheck -check-prefix=CHECK-CSSC %s
+; RUN: llc < %s -mtriple=aarch64_be-none-eabi | FileCheck %s 
--check-prefix=CHECK-BE
 
 define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
 ; CHECK-LABEL: cnt32_advsimd:
@@ -32,6 +33,14 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
 ; CHECK-CSSC:   // %bb.0:
 ; CHECK-CSSC-NEXT:cnt w0, w0
 ; CHECK-CSSC-NEXT:ret
+;
+; CHECK-BE-LABEL: cnt32_advsimd:
+; CHECK-BE:   // %bb.0:
+; CHECK-BE-NEXT:fmov s0, w0
+; CHECK-BE-NEXT:cnt v0.8b, v0.8b
+; CHECK-BE-NEXT:addv b0, v0.8b
+; CHECK-BE-NEXT:fmov w0, s0
+; CHECK-BE-NEXT:ret
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
 }
@@ -69,6 +78,16 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
 ; CHECK-CSSC-NEXT:fmov w8, s0
 ; CHECK-CSSC-NEXT:cnt w0, w8
 ; CHECK-CSSC-NEXT:ret
+;
+; CHECK-BE-LABEL: cnt32_advsimd_2:
+; CHECK-BE:   // %bb.0:
+; CHECK-BE-NEXT:rev64 v0.2s, v0.2s
+; CHECK-BE-NEXT:fmov w8, s0
+; CHECK-BE-NEXT:fmov s0, w8
+; CHECK-BE-NEXT:cnt v0.8b, v0.8b
+; CHECK-BE-NEXT:addv b0, v0.8b
+; CHECK-BE-NEXT:fmov w0, s0
+; CHECK-BE-NEXT:ret
   %1 = extractelement <2 x i32> %x, i64 0
   %2 = tail call i32 @llvm.ctpop.i32(i32 %1)
   ret i32 %2
@@ -103,6 +122,15 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
 ; CHECK-CSSC:   // %bb.0:
 ; CHECK-CSSC-NEXT:cnt x0, x0
 ; CHECK-CSSC-NEXT:ret
+;
+; CHECK-BE-LABEL: cnt64_advsimd:
+; CHECK-BE:   // %bb.0:
+; CHECK-BE-NEXT:fmov d0, x0
+; CHECK-BE-NEXT:rev64 v0.8b, v0.8b
+; CHECK-BE-NEXT:cnt v0.8b, v0.8b
+; CHECK-BE-NEXT:addv b0, v0.8b
+; CHECK-BE-NEXT:fmov x0, d0
+; CHECK-BE-NEXT:ret
   %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
   ret i64 %cnt
 }
@@ -147,6 +175,22 @@ define i32 @cnt32(i32 %x) nounwind readnone 
noimplicitfloat {
 ; CHECK-CSSC:   // %bb.0:
 ; CHECK-CSSC-NEXT:cnt w0, w0
 ; CHECK-CSSC-NEXT:ret
+;
+; CHECK-BE-LABEL: cnt32:
+; CHECK-BE:   // %bb.0:
+; CHECK-BE-NEXT:lsr w9, w0, #1
+; CHECK-BE-NEXT:mov w8, #16843009 // =0x1010101
+; CHECK-BE-NEXT:and w9, w9, #0x
+; CHECK-BE-NEXT:sub w9, w0, w9
+; CHECK-BE-NEXT:lsr w10, w9, #2
+; CHECK-BE-NEXT:and w9, w9, #0x
+; CHECK-BE-NEXT:and w10, w10, #0x
+; CHECK-BE-NEXT:add w9, w9, w10
+; CHECK-BE-NEXT:add w9, w9, w9, lsr #4
+; CHECK-BE-NEXT:and w9, w9, #0xf0f0f0f
+; CHECK-BE-NEXT:

[llvm-branch-commits] [llvm] release/20.x: [AArch64] Fix BE popcount casts. (#129879) (PR #129996)

2025-03-05 Thread via llvm-branch-commits


llvmbot wrote:

@alexrp What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/129996
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/20.x: [AArch64] Fix SVE scalar fcopysign lowering without neon. (#129787) (PR #129997)

2025-03-05 Thread via llvm-branch-commits


https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/129997
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/20.x: [AArch64] Fix SVE scalar fcopysign lowering without neon. (#129787) (PR #129997)

2025-03-05 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: None (llvmbot)


Changes

Backport 4c2d1b4c53de d4ab3df320f9

Requested by: @davemgreen

---
Full diff: https://github.com/llvm/llvm-project/pull/129997.diff


2 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+19) 
- (modified) 
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll (+199) 


``diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b5cca88b6b511..62a26b0aef187 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10681,6 +10681,25 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue 
Op,
 return convertFromScalableVector(DAG, VT, Res);
   }
 
+  // With SVE, but without Neon, extend the scalars to scalable vectors and use
+  // a SVE FCOPYSIGN.
+  if (!VT.isVector() && !Subtarget->isNeonAvailable() &&
+  Subtarget->isSVEorStreamingSVEAvailable()) {
+if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64)
+  return SDValue();
+EVT SVT = getPackedSVEVectorVT(VT);
+
+SDValue Ins1 =
+DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SVT, DAG.getUNDEF(SVT), In1,
+DAG.getConstant(0, DL, MVT::i64));
+SDValue Ins2 =
+DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SVT, DAG.getUNDEF(SVT), In2,
+DAG.getConstant(0, DL, MVT::i64));
+SDValue FCS = DAG.getNode(ISD::FCOPYSIGN, DL, SVT, Ins1, Ins2);
+return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, FCS,
+   DAG.getConstant(0, DL, MVT::i64));
+  }
+
   auto BitCast = [this](EVT VT, SDValue Op, SelectionDAG &DAG) {
 if (VT.isScalableVector())
   return getSVESafeBitCast(VT, Op, DAG);
diff --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
index 2282e74af5d00..79921e25caf53 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
@@ -8,6 +8,205 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 target triple = "aarch64-unknown-linux-gnu"
 
+define void @test_copysign_f16(ptr %ap, ptr %bp) {
+; SVE-LABEL: test_copysign_f16:
+; SVE:   // %bb.0:
+; SVE-NEXT:ldr h0, [x1]
+; SVE-NEXT:ldr h1, [x0]
+; SVE-NEXT:and z0.h, z0.h, #0x8000
+; SVE-NEXT:and z1.h, z1.h, #0x7fff
+; SVE-NEXT:orr z0.d, z1.d, z0.d
+; SVE-NEXT:str h0, [x0]
+; SVE-NEXT:ret
+;
+; SVE2-LABEL: test_copysign_f16:
+; SVE2:   // %bb.0:
+; SVE2-NEXT:mov z0.h, #32767 // =0x7fff
+; SVE2-NEXT:ldr h1, [x1]
+; SVE2-NEXT:ldr h2, [x0]
+; SVE2-NEXT:bsl z2.d, z2.d, z1.d, z0.d
+; SVE2-NEXT:str h2, [x0]
+; SVE2-NEXT:ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_f16:
+; NONEON-NOSVE:   // %bb.0:
+; NONEON-NOSVE-NEXT:sub sp, sp, #16
+; NONEON-NOSVE-NEXT:.cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT:ldr h0, [x0]
+; NONEON-NOSVE-NEXT:ldr h1, [x1]
+; NONEON-NOSVE-NEXT:fcvt s0, h0
+; NONEON-NOSVE-NEXT:str h1, [sp, #12]
+; NONEON-NOSVE-NEXT:ldrb w8, [sp, #13]
+; NONEON-NOSVE-NEXT:tst w8, #0x80
+; NONEON-NOSVE-NEXT:fabs s0, s0
+; NONEON-NOSVE-NEXT:fneg s1, s0
+; NONEON-NOSVE-NEXT:fcsel s0, s1, s0, ne
+; NONEON-NOSVE-NEXT:fcvt h0, s0
+; NONEON-NOSVE-NEXT:str h0, [x0]
+; NONEON-NOSVE-NEXT:add sp, sp, #16
+; NONEON-NOSVE-NEXT:ret
+  %a = load half, ptr %ap
+  %b = load half, ptr %bp
+  %r = call half @llvm.copysign.f16(half %a, half %b)
+  store half %r, ptr %ap
+  ret void
+}
+
+define void @test_copysign_bf16(ptr %ap, ptr %bp) {
+; SVE-LABEL: test_copysign_bf16:
+; SVE:   // %bb.0:
+; SVE-NEXT:sub sp, sp, #16
+; SVE-NEXT:.cfi_def_cfa_offset 16
+; SVE-NEXT:ldr h0, [x0]
+; SVE-NEXT:ldr h1, [x1]
+; SVE-NEXT:fmov w8, s0
+; SVE-NEXT:str h1, [sp, #12]
+; SVE-NEXT:ldrb w9, [sp, #13]
+; SVE-NEXT:and w8, w8, #0x7fff
+; SVE-NEXT:tst w9, #0x80
+; SVE-NEXT:fmov s0, w8
+; SVE-NEXT:eor w8, w8, #0x8000
+; SVE-NEXT:fmov s1, w8
+; SVE-NEXT:fcsel h0, h1, h0, ne
+; SVE-NEXT:str h0, [x0]
+; SVE-NEXT:add sp, sp, #16
+; SVE-NEXT:ret
+;
+; SVE2-LABEL: test_copysign_bf16:
+; SVE2:   // %bb.0:
+; SVE2-NEXT:sub sp, sp, #16
+; SVE2-NEXT:.cfi_def_cfa_offset 16
+; SVE2-NEXT:ldr h0, [x0]
+; SVE2-NEXT:ldr h1, [x1]
+; SVE2-NEXT:fmov w8, s0
+; SVE2-NEXT:str h1, [sp, #12]
+; SVE2-NEXT:ldrb w9, [sp, #13]
+; SVE2-NEXT:and w8, w8, #0x7fff
+; SVE2-NEXT:tst w9, #0x80
+; SVE2-NEXT:fmov s0, w8
+; SVE2-NEXT:eor w8, w8, #0x8000
+; SVE2-NEXT:fmov s1, w8
+; SVE2-NEXT:fcsel h0, h1, h0, ne
+; SVE2-NEXT:str h0, [x0]
+; SVE2-NEXT:add sp, sp, #16
+; SVE2-NEXT:ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_bf16:
+; NONEON-NOSVE:   // %bb.0:
+; NONEON

[llvm-branch-commits] [llvm] 56f8d69 - Revert "[LTO][Pipelines][Coro] De-duplicate Coro passes (#128654)"

2025-03-05 Thread via llvm-branch-commits


Author: Vitaly Buka
Date: 2025-03-05T18:40:30-08:00
New Revision: 56f8d690f4d0c812c6e7b4173d4be940dfa1bf36

URL: 
https://github.com/llvm/llvm-project/commit/56f8d690f4d0c812c6e7b4173d4be940dfa1bf36
DIFF: 
https://github.com/llvm/llvm-project/commit/56f8d690f4d0c812c6e7b4173d4be940dfa1bf36.diff

LOG: Revert "[LTO][Pipelines][Coro] De-duplicate Coro passes (#128654)"

This reverts commit 31897e651a1aa69207806d497a7080e252c53ebe.

Added: 


Modified: 
llvm/lib/Passes/PassBuilderPipelines.cpp
llvm/test/LTO/X86/coro.ll
llvm/test/Other/new-pm-defaults.ll
llvm/test/Other/new-pm-lto-defaults.ll

Removed: 




diff  --git a/llvm/lib/Passes/PassBuilderPipelines.cpp 
b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 546a5eb1ec283..07db107325f02 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -419,16 +419,14 @@ static bool isLTOPostLink(ThinOrFullLTOPhase Phase) {
 
 // Helper to wrap conditionally Coro passes.
 static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase) {
+  // TODO: Skip passes according to Phase.
   ModulePassManager CoroPM;
-  if (!isLTOPostLink(Phase))
-CoroPM.addPass(CoroEarlyPass());
-  if (!isLTOPreLink(Phase)) {
-CGSCCPassManager CGPM;
-CGPM.addPass(CoroSplitPass());
-CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
-CoroPM.addPass(CoroCleanupPass());
-CoroPM.addPass(GlobalDCEPass());
-  }
+  CoroPM.addPass(CoroEarlyPass());
+  CGSCCPassManager CGPM;
+  CGPM.addPass(CoroSplitPass());
+  CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+  CoroPM.addPass(CoroCleanupPass());
+  CoroPM.addPass(GlobalDCEPass());
   return CoroConditionalWrapper(std::move(CoroPM));
 }
 
@@ -1012,7 +1010,7 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
   RequireAnalysisPass()));
 
-  if (!isLTOPreLink(Phase)) {
+  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
 MainCGPipeline.addPass(CoroAnnotationElidePass());
   }
@@ -1062,7 +1060,7 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel 
Level,
   buildFunctionSimplificationPipeline(Level, Phase),
   PTO.EagerlyInvalidateAnalyses));
 
-  if (!isLTOPreLink(Phase)) {
+  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
 CoroSplitPass(Level != OptimizationLevel::O0)));
 MPM.addPass(
@@ -1122,8 +1120,7 @@ 
PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
 // Do basic inference of function attributes from known properties of 
system
 // libraries and other oracles.
 MPM.addPass(InferFunctionAttrsPass());
-if (!isLTOPostLink(Phase))
-  MPM.addPass(CoroEarlyPass());
+MPM.addPass(CoroEarlyPass());
 
 FunctionPassManager EarlyFPM;
 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
@@ -1293,7 +1290,7 @@ 
PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   // and argument promotion.
   MPM.addPass(DeadArgumentEliminationPass());
 
-  if (!isLTOPreLink(Phase))
+  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink)
 MPM.addPass(CoroCleanupPass());
 
   // Optimize globals now that functions are fully simplified.
@@ -1958,6 +1955,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel 
Level,
 return MPM;
   }
 
+  // TODO: Skip to match buildCoroWrapper.
+  MPM.addPass(CoroEarlyPass());
+
   // Optimize globals to try and fold them into constants.
   MPM.addPass(GlobalOptPass());
 

diff  --git a/llvm/test/LTO/X86/coro.ll b/llvm/test/LTO/X86/coro.ll
index f9830d964bc69..cde398dd76d85 100644
--- a/llvm/test/LTO/X86/coro.ll
+++ b/llvm/test/LTO/X86/coro.ll
@@ -1,6 +1,4 @@
-; RUN: opt %s -passes='lto-pre-link' -S -o %t1.ll
-; RUN: FileCheck %s --check-prefixes=CHECK,PRELINK --implicit-check-not="call 
void @llvm.coro" --input-file=%t1.ll
-; RUN: llvm-as %t1.ll -o %t1.bc
+; RUN: llvm-as %s -o %t1.bc
 ; RUN: llvm-lto2 run %t1.bc -o %t2.o -r=%t1.bc,test,plx 
-r=%t1.bc,extern_func,plx -save-temps
 ; RUN: llvm-dis %t2.o.0.5.precodegen.bc -o - | FileCheck %s 
--implicit-check-not="call void @llvm.coro"
 
@@ -9,9 +7,7 @@ target triple = "x86_64-unknown-fuchsia"
 
 declare void @extern_func()
 
-; CHECK: define{{.*}} void @test(
-; PRELINK: call ptr @llvm.coro.subfn.addr
-; PRELINK: call ptr @llvm.coro.subfn.addr
+; CHECK:  define {{.*}} void @test(
 define void @test(ptr %hdl) {
   call void @llvm.coro.resume(ptr %hdl)
   call void @llvm.coro.destroy(ptr %hdl)

diff  --git a/llvm/test/Other/new-pm-defaults.ll 
b/llvm/test/Other/new-pm-defaults.ll
index 30ff1a5879df2..c554fdbf4c799 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -230,13 +230,13 @@
 ;

[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)

2025-03-05 Thread John McIver via llvm-branch-commits



@@ -3124,6 +3124,19 @@ inline auto m_c_LogicalOp(const LHS &L, const RHS &R) {
   return m_LogicalOp(L, R);
 }
 
+struct GuaranteedNotToBeUndefOrPoison_match {
+  template  bool match(ITy *V) {
+if (auto *AsValue = dyn_cast(V))
+  return isGuaranteedNotToBeUndefOrPoison(AsValue);

jmciver wrote:

I'll fix the helper to take caller context.

https://github.com/llvm/llvm-project/pull/129776
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)

2025-03-05 Thread John McIver via llvm-branch-commits



@@ -4813,15 +4813,22 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst 
&I) {
   // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
   //   duplicating logic for binops at least.
   auto getUndefReplacement = [&I](Type *Ty) {
-Constant *BestValue = nullptr;
-Constant *NullValue = Constant::getNullValue(Ty);
+Value *BestValue = nullptr;
+Value *NullValue = Constant::getNullValue(Ty);
 for (const auto *U : I.users()) {
-  Constant *C = NullValue;
+  Value *C = NullValue;
   if (match(U, m_Or(m_Value(), m_Value(
 C = ConstantInt::getAllOnesValue(Ty);
   else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value(
 C = ConstantInt::getTrue(Ty);
-
+  else if (I.hasOneUse() &&

jmciver wrote:

Good point! I'll fix this as well.

https://github.com/llvm/llvm-project/pull/129776
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)

2025-03-05 Thread Yingwei Zheng via llvm-branch-commits



@@ -3124,6 +3124,19 @@ inline auto m_c_LogicalOp(const LHS &L, const RHS &R) {
   return m_LogicalOp(L, R);
 }
 
+struct GuaranteedNotToBeUndefOrPoison_match {
+  template  bool match(ITy *V) {
+if (auto *AsValue = dyn_cast(V))
+  return isGuaranteedNotToBeUndefOrPoison(AsValue);

dtcxzyw wrote:

I don't like this helper. Some context information (e.g., AC/DT/CxtI) is 
available in InstCombine.
They are useful to get a more precise analysis result.

https://github.com/llvm/llvm-project/pull/129776
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)

2025-03-05 Thread Yingwei Zheng via llvm-branch-commits



@@ -4813,15 +4813,22 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst 
&I) {
   // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
   //   duplicating logic for binops at least.
   auto getUndefReplacement = [&I](Type *Ty) {
-Constant *BestValue = nullptr;
-Constant *NullValue = Constant::getNullValue(Ty);
+Value *BestValue = nullptr;
+Value *NullValue = Constant::getNullValue(Ty);
 for (const auto *U : I.users()) {
-  Constant *C = NullValue;
+  Value *C = NullValue;
   if (match(U, m_Or(m_Value(), m_Value(
 C = ConstantInt::getAllOnesValue(Ty);
   else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value(
 C = ConstantInt::getTrue(Ty);
-
+  else if (I.hasOneUse() &&

dtcxzyw wrote:

If `I` has only one use, it must have only one user. I would like to hoist this 
logic out of the loop.
```
if (I.hasOneUse() && match(I->user_back(), m_c_Select(m_Specific(&I), 
m_Value(Arm)) && isGuaranteedNotToBeUndefOrPoison(Arm, &AC, &DT, &I))
  return Arm;

// existing code
Constant *BestValue = nullptr;
Constant *NullValue = Constant::getNullValue(Ty);
...
```


https://github.com/llvm/llvm-project/pull/129776
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-agpr-alloc (PR #129893)

2025-03-05 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/129893
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-agpr-alloc (PR #129893)

2025-03-05 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/129893
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)

2025-03-05 Thread Yingwei Zheng via llvm-branch-commits

dtcxzyw wrote:

> I would like to incorporate this and then once freeze poison -> null 
> canonicalization is removed from InstCombine refactor appropriately. Would 
> this be acceptable?

I don't mean to block this patch. I just worry that these patches may not be 
well tested (fuzzers/compile-time tracker/llvm-opt-benchmark) until we remove 
the canonicalization.

https://github.com/llvm/llvm-project/pull/129776
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-agpr-alloc (PR #129893)

2025-03-05 Thread Matt Arsenault via llvm-branch-commits



@@ -603,11 +601,7 @@ SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction 
&MF) const {
 
 if (MinNumAGPRs == DefaultNumAGPR.first) {
   // Default to splitting half the registers if AGPRs are required.
-
-  if (MFI->mayNeedAGPRs())
-MinNumAGPRs = MaxNumAGPRs = MaxVectorRegs / 2;
-  else
-MinNumAGPRs = 0;

arsenm wrote:

mayNeedAGPRs is a wrapper around the attribute now, this is just redundant now

https://github.com/llvm/llvm-project/pull/129893
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-agpr-alloc (PR #129893)

2025-03-05 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

### Merge activity

* **Mar 5, 9:11 PM EST**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/129893).


https://github.com/llvm/llvm-project/pull/129893
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-num-agpr (PR #129893)

2025-03-05 Thread Carl Ritson via llvm-branch-commits



@@ -603,11 +601,7 @@ SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction 
&MF) const {
 
 if (MinNumAGPRs == DefaultNumAGPR.first) {
   // Default to splitting half the registers if AGPRs are required.
-
-  if (MFI->mayNeedAGPRs())
-MinNumAGPRs = MaxNumAGPRs = MaxVectorRegs / 2;
-  else
-MinNumAGPRs = 0;

perlfu wrote:

I guess the removal of the forced minima yields no functional change?

https://github.com/llvm/llvm-project/pull/129893
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)

2025-03-05 Thread John McIver via llvm-branch-commits

jmciver wrote:

@dtcxzyw I was not aware I should be using a fuzzer. What tool would you 
recommend?

>From a correctness standpoint the match is relatively narrow and I have 
>discussed with @nlopes extensively. The patch has been tested with bootstrap 
>build of LLVM running all regressions. Additionally I have used the 
>llvm-test-suite for correctness checks.

I have sent a request to @nikita asking for compile-time tracker access and can 
report results once available.

I also have used the patch in conjunction with 15+ Phoronix benchmarks. The 
optimization involving the fold of global is something I have seen in our 
memory semantics work in testing FFTW.

https://github.com/llvm/llvm-project/pull/129776
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port FEntryInserter to NPM (PR #129857)

2025-03-05 Thread Akshat Oke via llvm-branch-commits


optimisan wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/129857?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#129857** https://app.graphite.dev/github/pr/llvm/llvm-project/129857?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/129857?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#129853** https://app.graphite.dev/github/pr/llvm/llvm-project/129853?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#129828** https://app.graphite.dev/github/pr/llvm/llvm-project/129828?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/129857
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] release/20.x: [Clang] Treat constexpr-unknown value as invalid in `EvaluateAsInitializer` (#128409) (PR #129836)

2025-03-05 Thread via llvm-branch-commits


https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/129836
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port XRayInstrumentation to NPM (PR #129865)

2025-03-05 Thread Akshat Oke via llvm-branch-commits


optimisan wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/129865?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#129866** https://app.graphite.dev/github/pr/llvm/llvm-project/129866?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#129865** https://app.graphite.dev/github/pr/llvm/llvm-project/129865?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/129865?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#129857** https://app.graphite.dev/github/pr/llvm/llvm-project/129857?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#129853** https://app.graphite.dev/github/pr/llvm/llvm-project/129853?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#129828** https://app.graphite.dev/github/pr/llvm/llvm-project/129828?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/129865
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] release/20.x: [Clang] Treat constexpr-unknown value as invalid in `EvaluateAsInitializer` (#128409) (PR #129836)

2025-03-05 Thread via llvm-branch-commits


https://github.com/cor3ntin approved this pull request.


https://github.com/llvm/llvm-project/pull/129836
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port FEntryInserter to NPM (PR #129857)

2025-03-05 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan created 
https://github.com/llvm/llvm-project/pull/129857

None

>From 2a78a73afa6bd50b0c9a71da41993917eba14587 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 5 Mar 2025 09:19:08 +
Subject: [PATCH] [CodeGen][NPM] Port FEntryInserter to NPM

---
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  1 +
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/FEntryInserter.cpp   | 25 +++
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 6 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 5f8e55d783161..63917b2b7f729 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -112,7 +112,7 @@ void initializeExpandPostRALegacyPass(PassRegistry &);
 void initializeExpandReductionsPass(PassRegistry &);
 void initializeExpandVariadicsPass(PassRegistry &);
 void initializeExternalAAWrapperPassPass(PassRegistry &);
-void initializeFEntryInserterPass(PassRegistry &);
+void initializeFEntryInserterLegacyPass(PassRegistry &);
 void initializeFinalizeISelPass(PassRegistry &);
 void initializeFinalizeMachineBundlesPass(PassRegistry &);
 void initializeFixIrreduciblePass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 4db489d804013..bab475d740467 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/ExpandMemCmp.h"
 #include "llvm/CodeGen/ExpandPostRAPseudos.h"
 #include "llvm/CodeGen/ExpandReductions.h"
+#include "llvm/CodeGen/FEntryInserter.h"
 #include "llvm/CodeGen/FinalizeISel.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GlobalMerge.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index d032087fa7073..667a7352930ea 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -142,6 +142,7 @@ MACHINE_FUNCTION_PASS("dead-mi-elimination", 
DeadMachineInstructionElimPass())
 MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass())
 MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass())
 MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass())
+MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass())
 MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass())
 MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass())
 MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass())
@@ -258,7 +259,6 @@ DUMMY_MACHINE_FUNCTION_PASS("cfi-fixup", CFIFixupPass)
 DUMMY_MACHINE_FUNCTION_PASS("cfi-instr-inserter", CFIInstrInserterPass)
 DUMMY_MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass)
 DUMMY_MACHINE_FUNCTION_PASS("dot-machine-cfg", MachineCFGPrinter)
-DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass)
 DUMMY_MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", 
FixupStatepointCallerSavedPass)
 DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", MIRProfileLoaderNewPass)
 DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 2cc4bf14e9804..effb556e63435 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -43,7 +43,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeExpandLargeFpConvertLegacyPassPass(Registry);
   initializeExpandMemCmpLegacyPassPass(Registry);
   initializeExpandPostRALegacyPass(Registry);
-  initializeFEntryInserterPass(Registry);
+  initializeFEntryInserterLegacyPass(Registry);
   initializeFinalizeISelPass(Registry);
   initializeFinalizeMachineBundlesPass(Registry);
   initializeFixupStatepointCallerSavedPass(Registry);
diff --git a/llvm/lib/CodeGen/FEntryInserter.cpp 
b/llvm/lib/CodeGen/FEntryInserter.cpp
index 68304dd41db04..4f1bd7df6a204 100644
--- a/llvm/lib/CodeGen/FEntryInserter.cpp
+++ b/llvm/lib/CodeGen/FEntryInserter.cpp
@@ -10,9 +10,11 @@
 //
 
//===--===//
 
+#include "llvm/CodeGen/FEntryInserter.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Function.h"
@@ -21,17 +23,30 @@
 using namespace llvm;
 
 namespace {
-struct FEntryInserter : public MachineFunctionPass {
+struct FEntryInserter {
+  bool run(MachineFunction &MF);
+};
+
+struct FEntryInserterLegacy : public MachineFunctionPass {
   static char ID; // Pass identification,

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port FEntryInserter to NPM (PR #129857)

2025-03-05 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/129857

>From 0ba6ca6ef0172f61f23cfee8d20a59e1138d5dfc Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 5 Mar 2025 09:19:08 +
Subject: [PATCH] [CodeGen][NPM] Port FEntryInserter to NPM

---
 llvm/include/llvm/CodeGen/FEntryInserter.h| 24 ++
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  1 +
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/FEntryInserter.cpp   | 25 +++
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 7 files changed, 49 insertions(+), 8 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/FEntryInserter.h

diff --git a/llvm/include/llvm/CodeGen/FEntryInserter.h 
b/llvm/include/llvm/CodeGen/FEntryInserter.h
new file mode 100644
index 0..16c5372d049fa
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/FEntryInserter.h
@@ -0,0 +1,24 @@
+//===- llvm/CodeGen/FEntryInserter.h *- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_FENTRYINSERTER_H
+#define LLVM_CODEGEN_FENTRYINSERTER_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class FEntryInserterPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_FENTRYINSERTER_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 5f8e55d783161..63917b2b7f729 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -112,7 +112,7 @@ void initializeExpandPostRALegacyPass(PassRegistry &);
 void initializeExpandReductionsPass(PassRegistry &);
 void initializeExpandVariadicsPass(PassRegistry &);
 void initializeExternalAAWrapperPassPass(PassRegistry &);
-void initializeFEntryInserterPass(PassRegistry &);
+void initializeFEntryInserterLegacyPass(PassRegistry &);
 void initializeFinalizeISelPass(PassRegistry &);
 void initializeFinalizeMachineBundlesPass(PassRegistry &);
 void initializeFixIrreduciblePass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 4db489d804013..bab475d740467 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/ExpandMemCmp.h"
 #include "llvm/CodeGen/ExpandPostRAPseudos.h"
 #include "llvm/CodeGen/ExpandReductions.h"
+#include "llvm/CodeGen/FEntryInserter.h"
 #include "llvm/CodeGen/FinalizeISel.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GlobalMerge.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index d032087fa7073..667a7352930ea 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -142,6 +142,7 @@ MACHINE_FUNCTION_PASS("dead-mi-elimination", 
DeadMachineInstructionElimPass())
 MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass())
 MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass())
 MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass())
+MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass())
 MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass())
 MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass())
 MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass())
@@ -258,7 +259,6 @@ DUMMY_MACHINE_FUNCTION_PASS("cfi-fixup", CFIFixupPass)
 DUMMY_MACHINE_FUNCTION_PASS("cfi-instr-inserter", CFIInstrInserterPass)
 DUMMY_MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass)
 DUMMY_MACHINE_FUNCTION_PASS("dot-machine-cfg", MachineCFGPrinter)
-DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass)
 DUMMY_MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", 
FixupStatepointCallerSavedPass)
 DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", MIRProfileLoaderNewPass)
 DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 2cc4bf14e9804..effb556e63435 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -43,7 +43,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeExpandLargeFpConvertLegacyPassPass(Registry);
   initializeExpandMemCmpLegacyPassPass(Registry);
   initializeExpandPostRALegacyPass(Registry);
-  initializeFEntryInserterPass(Registry);
+  initial

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port PatchableFunction to NPM (PR #129866)

2025-03-05 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan created 
https://github.com/llvm/llvm-project/pull/129866

None

>From c9386f19d4a87f9fd88bb96aa0c23eba638e96da Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 5 Mar 2025 10:34:25 +
Subject: [PATCH] [CodeGen][NPM] Port PatchableFunction to NPM

---
 llvm/include/llvm/CodeGen/PatchableFunction.h | 29 +++
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  1 +
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/PatchableFunction.cpp| 37 ++-
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 7 files changed, 61 insertions(+), 13 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/PatchableFunction.h

diff --git a/llvm/include/llvm/CodeGen/PatchableFunction.h 
b/llvm/include/llvm/CodeGen/PatchableFunction.h
new file mode 100644
index 0..d10dcfbc1f015
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/PatchableFunction.h
@@ -0,0 +1,29 @@
+//===- llvm/CodeGen/PatchableFunction.h -*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_PATCHABLEFUNCTION_H
+#define LLVM_CODEGEN_PATCHABLEFUNCTION_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class PatchableFunctionPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+
+  MachineFunctionProperties getRequiredProperties() const {
+return MachineFunctionProperties().set(
+MachineFunctionProperties::Property::NoVRegs);
+  }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_PATCHABLEFUNCTION_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index dcfd9fc6a86b9..f1c16e3b1cb40 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -225,7 +225,7 @@ void initializeOptimizePHIsLegacyPass(PassRegistry &);
 void initializePEIPass(PassRegistry &);
 void initializePHIEliminationPass(PassRegistry &);
 void initializePartiallyInlineLibCallsLegacyPassPass(PassRegistry &);
-void initializePatchableFunctionPass(PassRegistry &);
+void initializePatchableFunctionLegacyPass(PassRegistry &);
 void initializePeepholeOptimizerLegacyPass(PassRegistry &);
 void initializePhiValuesWrapperPassPass(PassRegistry &);
 void initializePhysicalRegisterUsageInfoWrapperLegacyPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 426dc6c7eacfd..aab2c58ac0f78 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -59,6 +59,7 @@
 #include "llvm/CodeGen/MachineVerifier.h"
 #include "llvm/CodeGen/OptimizePHIs.h"
 #include "llvm/CodeGen/PHIElimination.h"
+#include "llvm/CodeGen/PatchableFunction.h"
 #include "llvm/CodeGen/PeepholeOptimizer.h"
 #include "llvm/CodeGen/PostRASchedulerList.h"
 #include "llvm/CodeGen/PreISelIntrinsicLowering.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 8b1373c0ffefd..bedbc3e88a7ce 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -152,6 +152,7 @@ MACHINE_FUNCTION_PASS("machine-scheduler", 
MachineSchedulerPass(TM))
 MACHINE_FUNCTION_PASS("machinelicm", MachineLICMPass())
 MACHINE_FUNCTION_PASS("no-op-machine-function", NoOpMachineFunctionPass())
 MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass())
+MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass())
 MACHINE_FUNCTION_PASS("peephole-opt", PeepholeOptimizerPass())
 MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass())
 MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass(TM))
@@ -279,7 +280,6 @@ DUMMY_MACHINE_FUNCTION_PASS("machine-sanmd", 
MachineSanitizerBinaryMetadata)
 DUMMY_MACHINE_FUNCTION_PASS("machine-uniformity", 
MachineUniformityInfoWrapperPass)
 DUMMY_MACHINE_FUNCTION_PASS("machineinstr-printer", MachineFunctionPrinterPass)
 DUMMY_MACHINE_FUNCTION_PASS("mirfs-discriminators", MIRAddFSDiscriminatorsPass)
-DUMMY_MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass)
 DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass)
 DUMMY_MACHINE_FUNCTION_PASS("print-machine-uniformity", 
MachineUniformityInfoPrinterPass)
 DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index b299983503232..375176ed4b1ce 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port PatchableFunction to NPM (PR #129866)

2025-03-05 Thread Akshat Oke via llvm-branch-commits


optimisan wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/129866?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#129866** https://app.graphite.dev/github/pr/llvm/llvm-project/129866?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/129866?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#129865** https://app.graphite.dev/github/pr/llvm/llvm-project/129865?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#129857** https://app.graphite.dev/github/pr/llvm/llvm-project/129857?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#129853** https://app.graphite.dev/github/pr/llvm/llvm-project/129853?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#129828** https://app.graphite.dev/github/pr/llvm/llvm-project/129828?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/129866
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port XRayInstrumentation to NPM (PR #129865)

2025-03-05 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan created 
https://github.com/llvm/llvm-project/pull/129865

None

>From 32a8bd59f64750fb3c2e72a7e26ba7a81ff86210 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 5 Mar 2025 10:11:27 +
Subject: [PATCH] [CodeGen][NPM] Port XRayInstrumentation to NPM

---
 .../llvm/CodeGen/XRayInstrumentation.h| 24 +
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  1 +
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/FEntryInserter.cpp   |  8 +-
 llvm/lib/CodeGen/XRayInstrumentation.cpp  | 90 +++
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 llvm/test/CodeGen/X86/xray-empty-firstmbb.mir |  1 +
 .../X86/xray-multiplerets-in-blocks.mir   |  1 +
 10 files changed, 110 insertions(+), 22 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/XRayInstrumentation.h

diff --git a/llvm/include/llvm/CodeGen/XRayInstrumentation.h 
b/llvm/include/llvm/CodeGen/XRayInstrumentation.h
new file mode 100644
index 0..ed39a7f3c1654
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/XRayInstrumentation.h
@@ -0,0 +1,24 @@
+//===- llvm/CodeGen/XRayInstrumentation.h *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_XRAYINSTRUMENTATION_H
+#define LLVM_CODEGEN_XRAYINSTRUMENTATION_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class XRayInstrumentationPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_XRAYINSTRUMENTATION_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 63917b2b7f729..dcfd9fc6a86b9 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -320,7 +320,7 @@ void initializeVirtRegRewriterPass(PassRegistry &);
 void initializeWasmEHPreparePass(PassRegistry &);
 void initializeWinEHPreparePass(PassRegistry &);
 void initializeWriteBitcodePassPass(PassRegistry &);
-void initializeXRayInstrumentationPass(PassRegistry &);
+void initializeXRayInstrumentationLegacyPass(PassRegistry &);
 
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index bab475d740467..426dc6c7eacfd 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -85,6 +85,7 @@
 #include "llvm/CodeGen/UnreachableBlockElim.h"
 #include "llvm/CodeGen/WasmEHPrepare.h"
 #include "llvm/CodeGen/WinEHPrepare.h"
+#include "llvm/CodeGen/XRayInstrumentation.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/IRPrinter/IRPrintingPasses.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 667a7352930ea..8b1373c0ffefd 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -188,6 +188,7 @@ MACHINE_FUNCTION_PASS("trigger-verifier-error", 
TriggerVerifierErrorPass())
 MACHINE_FUNCTION_PASS("two-address-instruction", TwoAddressInstructionPass())
 MACHINE_FUNCTION_PASS("verify", MachineVerifierPass())
 MACHINE_FUNCTION_PASS("verify", 
MachineTraceMetricsVerifierPass())
+MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass())
 #undef MACHINE_FUNCTION_PASS
 
 #ifndef MACHINE_FUNCTION_PASS_WITH_PARAMS
@@ -296,5 +297,4 @@ DUMMY_MACHINE_FUNCTION_PASS("stack-frame-layout", 
StackFrameLayoutAnalysisPass)
 DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass)
 DUMMY_MACHINE_FUNCTION_PASS("unpack-mi-bundles", UnpackMachineBundlesPass)
 DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass)
-DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass)
 #undef DUMMY_MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index effb556e63435..b299983503232 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -144,5 +144,5 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeVirtRegRewriterPass(Registry);
   initializeWasmEHPreparePass(Registry);
   initializeWinEHPreparePass(Registry);
-  initializeXRayInstrumentationPass(Registry);
+  initializeXRayInstrumentationLegacyPass(Registry);
 }
diff --git a/llvm/lib/CodeGen/FEntryInserter.cpp 
b/llvm/lib/CodeGen/FEntryInserter.cpp
index 4f1bd7df6a204..79949dac51448 100644
--- a/llvm/lib/CodeGen/FEntryInserter.cpp
+++ b/llvm/lib/Co

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port MachineBlockPlacementStats to NPM (PR #129853)

2025-03-05 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan created 
https://github.com/llvm/llvm-project/pull/129853

None

>From a01bc11d08290eab70e8e48858791cfbeb4123e6 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 5 Mar 2025 08:59:23 +
Subject: [PATCH] [CodeGen][NPM] Port MachineBlockPlacementStats to NPM

---
 .../llvm/CodeGen/MachineBlockPlacement.h  |  8 
 llvm/include/llvm/InitializePasses.h  |  2 +-
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/MachineBlockPlacement.cpp| 45 ++-
 5 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineBlockPlacement.h 
b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
index 8003b52fa6a3c..3d23ce7001071 100644
--- a/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
+++ b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
@@ -25,6 +25,14 @@ class MachineBlockPlacementPass
 MachineFunctionAnalysisManager &MFAM);
 };
 
+class MachineBlockPlacementStatsPass
+: public PassInfoMixin {
+
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 1ed7cbe976b9b..5f8e55d783161 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -185,7 +185,7 @@ void initializeMIRNamerPass(PassRegistry &);
 void initializeMIRPrintingPassPass(PassRegistry &);
 void initializeMachineBlockFrequencyInfoWrapperPassPass(PassRegistry &);
 void initializeMachineBlockPlacementLegacyPass(PassRegistry &);
-void initializeMachineBlockPlacementStatsPass(PassRegistry &);
+void initializeMachineBlockPlacementStatsLegacyPass(PassRegistry &);
 void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &);
 void initializeMachineCFGPrinterPass(PassRegistry &);
 void initializeMachineCSELegacyPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 8957011ca948c..d032087fa7073 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -137,6 +137,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", 
VirtRegMapAnalysis())
 #ifndef MACHINE_FUNCTION_PASS
 #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
 #endif
+MACHINE_FUNCTION_PASS("block-placement-stats", 
MachineBlockPlacementStatsPass())
 MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass())
 MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass())
 MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass())
@@ -250,7 +251,6 @@ DUMMY_MACHINE_MODULE_PASS("mir-strip-debug", 
StripDebugMachineModulePass)
 #endif
 DUMMY_MACHINE_FUNCTION_PASS("bbsections-prepare", BasicBlockSectionsPass)
 DUMMY_MACHINE_FUNCTION_PASS("bbsections-profile-reader", 
BasicBlockSectionsProfileReaderPass)
-DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", 
MachineBlockPlacementStatsPass)
 DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass)
 DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass)
 DUMMY_MACHINE_FUNCTION_PASS("cfguard-longjmp", CFGuardLongjmpPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 93729f08a8721..2cc4bf14e9804 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -73,7 +73,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeMIRProfileLoaderPassPass(Registry);
   initializeMachineBlockFrequencyInfoWrapperPassPass(Registry);
   initializeMachineBlockPlacementLegacyPass(Registry);
-  initializeMachineBlockPlacementStatsPass(Registry);
+  initializeMachineBlockPlacementStatsLegacyPass(Registry);
   initializeMachineCFGPrinterPass(Registry);
   initializeMachineCSELegacyPass(Registry);
   initializeMachineCombinerPass(Registry);
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp 
b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 322655c0c998b..b6dd374cf1b31 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -3829,21 +3829,35 @@ namespace {
 /// placement. This is separate from the actual placement pass so that they can
 /// be computed in the absence of any placement transformations or when using
 /// alternative placement strategies.
-class MachineBlockPlacementStats : public MachineFunctionPass {
+class MachineBlockPlacementStats {
   /// A handle to the branch probability pass.
   const MachineBranchProbabilityInfo *MBPI;
 
   /// A handle to the function-wide block frequency pass.
   const MachineBlockFrequencyInfo *MBFI;
 
+public:
+  MachineBlockPlacementStats(const MachineBranchProbabilityInfo *MBPI,
+ const MachineBlockFrequencyInfo *MBFI)
+

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port MachineBlockPlacementStats to NPM (PR #129853)

2025-03-05 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan ready_for_review 
https://github.com/llvm/llvm-project/pull/129853
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/20.x: Reland "[LV]: Teach LV to recursively (de)interleave." (#125094) (PR #128389)

2025-03-05 Thread Hassnaa Hamdi via llvm-branch-commits


https://github.com/hassnaaHamdi closed 
https://github.com/llvm/llvm-project/pull/128389
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)

2025-03-05 Thread Michael Buch via llvm-branch-commits


Michael137 wrote:

Could you elaborate on how this will be used/the motivation for this?

https://github.com/llvm/llvm-project/pull/128977
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)

2025-03-05 Thread Nikita Popov via llvm-branch-commits


nikic wrote:

> We can do this fold in InstSimplify: https://alive2.llvm.org/ce/z/Dm53TP

The transform is only valid if the freeze(poison) is one-use. And I don't think 
that InstSimplify should be doing any use-based checks. So I think InstCombine 
is the right place for it.

https://github.com/llvm/llvm-project/pull/129776
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)

2025-03-05 Thread Joshua Batista via llvm-branch-commits



@@ -0,0 +1,108 @@
+//===- HLSLRootSignature.cpp - HLSL Root Signature helper objects
+//--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+///
+/// \file This file contains helpers for working with HLSL Root Signatures.
+///
+//===--===//
+
+#include "llvm/Frontend/HLSL/HLSLRootSignature.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+
+namespace llvm {
+namespace hlsl {
+namespace rootsig {
+
+// Static helper functions
+
+static MDString *ClauseTypeToName(LLVMContext &Ctx, ClauseType Type) {
+  StringRef Name;
+  switch (Type) {
+  case ClauseType::CBuffer:
+Name = "CBV";
+break;
+  case ClauseType::SRV:
+Name = "SRV";
+break;
+  case ClauseType::UAV:
+Name = "UAV";
+break;
+  case ClauseType::Sampler:
+Name = "Sampler";
+break;
+  }
+  return MDString::get(Ctx, Name);
+}
+
+// Helper struct so that we can use the overloaded notation of std::visit
+template  struct OverloadBuilds : Ts... {
+  using Ts::operator()...;
+};
+template  OverloadBuilds(Ts...) -> OverloadBuilds;
+
+MDNode *MetadataBuilder::BuildRootSignature() {
+  for (const RootElement &Element : Elements) {
+MDNode *ElementMD =
+std::visit(OverloadBuilds{
+   [&](DescriptorTable Table) -> MDNode * {
+ return BuildDescriptorTable(Table);
+   },
+   [&](DescriptorTableClause Clause) -> MDNode * {
+ return BuildDescriptorTableClause(Clause);
+   },
+   },
+   Element);
+GeneratedMetadata.push_back(ElementMD);
+  }
+
+  return MDNode::get(Ctx, GeneratedMetadata);
+}
+
+MDNode *MetadataBuilder::BuildDescriptorTable(const DescriptorTable &Table) {
+  IRBuilder<> B(Ctx);
+  SmallVector TableOperands;
+  // Set the mandatory arguments
+  TableOperands.push_back(MDString::get(Ctx, "DescriptorTable"));
+  TableOperands.push_back(ConstantAsMetadata::get(
+  B.getInt32(llvm::to_underlying(Table.Visibility;
+
+  // Remaining operands are references to the table's clauses. The in-memory

bob80905 wrote:

Very helpful 👍

https://github.com/llvm/llvm-project/pull/125131
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)

2025-03-05 Thread Joshua Batista via llvm-branch-commits


https://github.com/bob80905 approved this pull request.

Just a nit but this looks good to me

https://github.com/llvm/llvm-project/pull/125131
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)

2025-03-05 Thread Finn Plummer via llvm-branch-commits



@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -o - %s | 
FileCheck %s
+
+// CHECK-DAG: ![[#EMPTY:]] = !{}
+[shader("compute"), RootSignature("")]
+[numthreads(1,1,1)]
+void FirstEntry() {}
+
+// CHECK-DAG: ![[#CBV:]] = !{!"CBV", i32 1, i32 0, i32 0, i32 -1, i32 4}

inbelic wrote:

These are in a deterministic order, I had only used DAG to help with 
readability of the testcase. But I can see that affects correctness. Updated to 
remove use of CHECK-DAG

https://github.com/llvm/llvm-project/pull/125131
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)

2025-03-05 Thread Finn Plummer via llvm-branch-commits


https://github.com/inbelic edited 
https://github.com/llvm/llvm-project/pull/125131
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)

2025-03-05 Thread Finn Plummer via llvm-branch-commits


https://github.com/inbelic edited 
https://github.com/llvm/llvm-project/pull/125131
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)

2025-03-05 Thread Finn Plummer via llvm-branch-commits


https://github.com/inbelic updated 
https://github.com/llvm/llvm-project/pull/125131

>From abe7e6703a008608e19ce3f9bdcbd1b613fab60d Mon Sep 17 00:00:00 2001
From: Finn Plummer 
Date: Wed, 29 Jan 2025 19:40:08 +
Subject: [PATCH 1/7] add basic empty root signature

---
 clang/lib/CodeGen/CGHLSLRuntime.cpp   | 21 +
 clang/test/CodeGenHLSL/RootSignature.hlsl | 19 +++
 2 files changed, 40 insertions(+)
 create mode 100644 clang/test/CodeGenHLSL/RootSignature.hlsl

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp 
b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index c354e58e15f4b..ff608323e9ac3 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -119,6 +119,20 @@ GlobalVariable *replaceBuffer(CGHLSLRuntime::Buffer &Buf) {
   return CBGV;
 }
 
+void addRootSignature(llvm::Function *Fn, llvm::Module &M) {
+  auto &Ctx = M.getContext();
+  IRBuilder<> B(M.getContext());
+
+  MDNode *ExampleRootSignature = MDNode::get(Ctx, {});
+
+  MDNode *ExamplePairing = MDNode::get(Ctx, {ValueAsMetadata::get(Fn),
+ ExampleRootSignature});
+
+  StringRef RootSignatureValKey = "dx.rootsignatures";
+  auto *RootSignatureValMD = M.getOrInsertNamedMetadata(RootSignatureValKey);
+  RootSignatureValMD->addOperand(ExamplePairing);
+}
+
 } // namespace
 
 llvm::Type *CGHLSLRuntime::convertHLSLSpecificType(const Type *T) {
@@ -453,6 +467,13 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl 
*FD,
   // FIXME: Handle codegen for return type semantics.
   // See: https://github.com/llvm/llvm-project/issues/57875
   B.CreateRetVoid();
+
+  // Add and identify root signature to function, if applicable
+  const AttrVec &Attrs = FD->getAttrs();
+  for (const Attr *Attr : Attrs) {
+if (isa(Attr))
+  addRootSignature(EntryFn, M);
+  }
 }
 
 void CGHLSLRuntime::setHLSLFunctionAttributes(const FunctionDecl *FD,
diff --git a/clang/test/CodeGenHLSL/RootSignature.hlsl 
b/clang/test/CodeGenHLSL/RootSignature.hlsl
new file mode 100644
index 0..1ea9ab7aaa2c3
--- /dev/null
+++ b/clang/test/CodeGenHLSL/RootSignature.hlsl
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -o - %s | 
FileCheck %s
+
+// CHECK: !dx.rootsignatures = !{![[#FIRST_ENTRY:]], ![[#SECOND_ENTRY:]]}
+// CHECK-DAG: ![[#FIRST_ENTRY]] = !{ptr @FirstEntry, ![[#RS:]]}
+// CHECK-DAG: ![[#SECOND_ENTRY]] = !{ptr @SecondEntry, ![[#RS:]]}
+// CHECK-DAG: ![[#RS]] = !{}
+
+[shader("compute"), RootSignature("")]
+[numthreads(1,1,1)]
+void FirstEntry() {}
+
+[shader("compute"), RootSignature("DescriptorTable()")]
+[numthreads(1,1,1)]
+void SecondEntry() {}
+
+// Sanity test to ensure to root is added for this function
+[shader("compute")]
+[numthreads(1,1,1)]
+void ThirdEntry() {}

>From 671f099d3d58995677c47b4226481b72295e525d Mon Sep 17 00:00:00 2001
From: Finn Plummer 
Date: Wed, 29 Jan 2025 19:57:48 +
Subject: [PATCH 2/7] pass down the actual root elements

- test that we have the correct number of elements
---
 clang/lib/CodeGen/CGHLSLRuntime.cpp   | 17 -
 clang/test/CodeGenHLSL/RootSignature.hlsl |  9 +
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp 
b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index ff608323e9ac3..4c9adcd8a9053 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -119,11 +119,18 @@ GlobalVariable *replaceBuffer(CGHLSLRuntime::Buffer &Buf) 
{
   return CBGV;
 }
 
-void addRootSignature(llvm::Function *Fn, llvm::Module &M) {
+void addRootSignature(
+ArrayRef Elements,
+llvm::Function *Fn, llvm::Module &M) {
   auto &Ctx = M.getContext();
-  IRBuilder<> B(M.getContext());
 
-  MDNode *ExampleRootSignature = MDNode::get(Ctx, {});
+  SmallVector GeneratedMetadata;
+  for (auto Element : Elements) {
+MDNode *ExampleRootElement = MDNode::get(Ctx, {});
+GeneratedMetadata.push_back(ExampleRootElement);
+  }
+
+  MDNode *ExampleRootSignature = MDNode::get(Ctx, GeneratedMetadata);
 
   MDNode *ExamplePairing = MDNode::get(Ctx, {ValueAsMetadata::get(Fn),
  ExampleRootSignature});
@@ -471,8 +478,8 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl 
*FD,
   // Add and identify root signature to function, if applicable
   const AttrVec &Attrs = FD->getAttrs();
   for (const Attr *Attr : Attrs) {
-if (isa(Attr))
-  addRootSignature(EntryFn, M);
+if (const auto *RSAttr = dyn_cast(Attr))
+  addRootSignature(RSAttr->getElements(), EntryFn, M);
   }
 }
 
diff --git a/clang/test/CodeGenHLSL/RootSignature.hlsl 
b/clang/test/CodeGenHLSL/RootSignature.hlsl
index 1ea9ab7aaa2c3..63c0505e224f0 100644
--- a/clang/test/CodeGenHLSL/RootSignature.hlsl
+++ b/clang/test/CodeGenHLSL/RootSignature.hlsl
@@ -1,9 +1,10 @@
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -o - %s | 
FileCheck %s
 
-// CHECK:

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)

2025-03-05 Thread Joshua Batista via llvm-branch-commits



@@ -0,0 +1,108 @@
+//===- HLSLRootSignature.cpp - HLSL Root Signature helper objects
+//--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+///
+/// \file This file contains helpers for working with HLSL Root Signatures.
+///
+//===--===//
+
+#include "llvm/Frontend/HLSL/HLSLRootSignature.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+
+namespace llvm {
+namespace hlsl {
+namespace rootsig {
+
+// Static helper functions
+
+static MDString *ClauseTypeToName(LLVMContext &Ctx, ClauseType Type) {
+  StringRef Name;
+  switch (Type) {
+  case ClauseType::CBuffer:
+Name = "CBV";
+break;
+  case ClauseType::SRV:
+Name = "SRV";
+break;
+  case ClauseType::UAV:
+Name = "UAV";
+break;
+  case ClauseType::Sampler:
+Name = "Sampler";
+break;
+  }
+  return MDString::get(Ctx, Name);
+}
+
+// Helper struct so that we can use the overloaded notation of std::visit
+template  struct OverloadBuilds : Ts... {
+  using Ts::operator()...;
+};
+template  OverloadBuilds(Ts...) -> OverloadBuilds;
+
+MDNode *MetadataBuilder::BuildRootSignature() {
+  for (const RootElement &Element : Elements) {
+MDNode *ElementMD =
+std::visit(OverloadBuilds{

bob80905 wrote:

Could you add a comment here for clarity?
Something like "for each element, if it's a desctable, run this, if its a 
desctableclause, run this"
Had to look up what std::visit does to understand this.

https://github.com/llvm/llvm-project/pull/125131
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-num-agpr (PR #129893)

2025-03-05 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec approved this pull request.


https://github.com/llvm/llvm-project/pull/129893
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] f414ee4 - Revert "Revert "[clang] Lower modf builtin using `llvm.modf` intrinsic" (#127…"

2025-03-05 Thread via llvm-branch-commits


Author: Benjamin Maxwell
Date: 2025-03-05T13:48:07Z
New Revision: f414ee456de732da99ae6a4c88304a6d82a7ff18

URL: 
https://github.com/llvm/llvm-project/commit/f414ee456de732da99ae6a4c88304a6d82a7ff18
DIFF: 
https://github.com/llvm/llvm-project/commit/f414ee456de732da99ae6a4c88304a6d82a7ff18.diff

LOG: Revert "Revert "[clang] Lower modf builtin using `llvm.modf` intrinsic" 
(#127…"

This reverts commit d595fc91aeb35cb7fad8ad37fa84a70863b09f69.

Added: 


Modified: 
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/X86/math-builtins.c
clang/test/CodeGen/aix-builtin-mapping.c
clang/test/CodeGen/builtin-attributes.c
clang/test/CodeGen/math-builtins-long.c
clang/test/CodeGen/math-libcalls.c

Removed: 




diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index ab8f19b25fa66..bd559a96d3182 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -859,6 +859,24 @@ static void emitSincosBuiltin(CodeGenFunction &CGF, const 
CallExpr *E,
   StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList);
 }
 
+static llvm::Value *emitModfBuiltin(CodeGenFunction &CGF, const CallExpr *E,
+llvm::Intrinsic::ID IntrinsicID) {
+  llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(0));
+  llvm::Value *IntPartDest = CGF.EmitScalarExpr(E->getArg(1));
+
+  llvm::Value *Call =
+  CGF.Builder.CreateIntrinsic(IntrinsicID, {Val->getType()}, Val);
+
+  llvm::Value *FractionalResult = CGF.Builder.CreateExtractValue(Call, 0);
+  llvm::Value *IntegralResult = CGF.Builder.CreateExtractValue(Call, 1);
+
+  QualType DestPtrType = E->getArg(1)->getType()->getPointeeType();
+  LValue IntegralLV = CGF.MakeNaturalAlignAddrLValue(IntPartDest, DestPtrType);
+  CGF.EmitStoreOfScalar(IntegralResult, IntegralLV);
+
+  return FractionalResult;
+}
+
 /// EmitFAbs - Emit a call to @llvm.fabs().
 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
   Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
@@ -4120,6 +4138,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   case Builtin::BI__builtin_frexpf128:
   case Builtin::BI__builtin_frexpf16:
 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
+  case Builtin::BImodf:
+  case Builtin::BImodff:
+  case Builtin::BImodfl:
+  case Builtin::BI__builtin_modf:
+  case Builtin::BI__builtin_modff:
+  case Builtin::BI__builtin_modfl:
+if (Builder.getIsFPConstrained())
+  break; // TODO: Emit constrained modf intrinsic once one exists.
+return RValue::get(emitModfBuiltin(*this, E, Intrinsic::modf));
   case Builtin::BI__builtin_isgreater:
   case Builtin::BI__builtin_isgreaterequal:
   case Builtin::BI__builtin_isless:

diff  --git a/clang/test/CodeGen/X86/math-builtins.c 
b/clang/test/CodeGen/X86/math-builtins.c
index 481d3c043683e..8a85d1f6c3a76 100644
--- a/clang/test/CodeGen/X86/math-builtins.c
+++ b/clang/test/CodeGen/X86/math-builtins.c
@@ -38,6 +38,24 @@ void foo(double *d, float f, float *fp, long double *l, int 
*i, const char *c) {
 // NO__ERRNO-NEXT: [[FREXP_F128_0:%.+]] = extractvalue { fp128, i32 } 
[[FREXP_F128]], 0
 
 
+// NO__ERRNO: [[MODF_F64:%.+]] = call { double, double } @llvm.modf.f64(double 
%{{.+}})
+// NO__ERRNO-NEXT: [[MODF_F64_FP:%.+]] = extractvalue { double, double } 
[[MODF_F64]], 0
+// NO__ERRNO-NEXT: [[MODF_F64_IP:%.+]] = extractvalue { double, double } 
[[MODF_F64]], 1
+// NO__ERRNO-NEXT: store double [[MODF_F64_IP]], ptr %{{.+}}, align 8
+
+// NO__ERRNO: [[MODF_F32:%.+]] = call { float, float } @llvm.modf.f32(float 
%{{.+}})
+// NO__ERRNO-NEXT: [[MODF_F32_FP:%.+]] = extractvalue { float, float } 
[[MODF_F32]], 0
+// NO__ERRNO-NEXT: [[MODF_F32_IP:%.+]] = extractvalue { float, float } 
[[MODF_F32]], 1
+// NO__ERRNO-NEXT: store float [[MODF_F32_IP]], ptr %{{.+}}, align 4
+
+// NO__ERRNO: [[MODF_F80:%.+]] = call { x86_fp80, x86_fp80 } 
@llvm.modf.f80(x86_fp80 %{{.+}})
+// NO__ERRNO-NEXT: [[MODF_F80_FP:%.+]] = extractvalue { x86_fp80, x86_fp80 } 
[[MODF_F80]], 0
+// NO__ERRNO-NEXT: [[MODF_F80_IP:%.+]] = extractvalue { x86_fp80, x86_fp80 } 
[[MODF_F80]], 1
+// NO__ERRNO-NEXT: store x86_fp80 [[MODF_F80_IP]], ptr %{{.+}}, align 16
+
+// NO__ERRNO: call fp128 @modff128(fp128 noundef %{{.+}}, ptr noundef %{{.+}})
+
+
 // NO__ERRNO: [[SINCOS_F64:%.+]] = call { double, double } 
@llvm.sincos.f64(double %{{.+}})
 // NO__ERRNO-NEXT: [[SINCOS_F64_0:%.+]] = extractvalue { double, double } 
[[SINCOS_F64]], 0
 // NO__ERRNO-NEXT: [[SINCOS_F64_1:%.+]] = extractvalue { double, double } 
[[SINCOS_F64]], 1
@@ -158,13 +176,13 @@ void foo(double *d, float f, float *fp, long double *l, 
int *i, const char *c) {
 
   __builtin_modf(f,d);   __builtin_modff(f,fp);  __builtin_modfl(f,l); 
__builtin_modff128(f,l);
 
-// NO__ERRNO: declare double @modf(double noundef, ptr noundef) 
[[NOT_READNONE:#[0-9]+]]
-

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port MachineBlockPlacementStats to NPM (PR #129853)

2025-03-05 Thread Akshat Oke via llvm-branch-commits


optimisan wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/129853?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#129853** https://app.graphite.dev/github/pr/llvm/llvm-project/129853?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/129853?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#129828** https://app.graphite.dev/github/pr/llvm/llvm-project/129828?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/129853
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Start considering new atomicrmw metadata on integer operations (PR #122138)

2025-03-05 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

psdb fails on some atomic tests with these, so they need some debugging 

https://github.com/llvm/llvm-project/pull/122138
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-num-agpr (PR #129893)

2025-03-05 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes

This performs the minimal replacment of amdgpu-no-agpr to
amdgpu-num-agpr=0. Most of the test diffs are due to the new
attribute sorting later alphabetically.

We could do better by trying to perform range merging in the attributor,
and trying to pick non-0 values.

---

Patch is 168.24 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/129893.diff


45 Files Affected:

- (modified) llvm/docs/AMDGPUUsage.rst (+1-6) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+7-2) 
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (+4-1) 
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp (+1-7) 
- (modified) llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll (+2-2) 
- (modified) 
llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.ll
 (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-no-agprs-violations.ll (+7-6) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-num-agpr.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll 
(+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll 
(+21-21) 
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+13-13) 
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll (+9-9) 
- (modified) llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/captured-frame-index.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll (+13-13) 
- (modified) llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll 
(+1-1) 
- (modified) 
llvm/test/CodeGen/AMDGPU/invalid-hidden-kernarg-in-kernel-signature.ll (+1-1) 
- (modified) 
llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/mfma-bf16-vgpr-cd-select.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select-gfx942.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/preload-kernargs.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll (+9-9) 
- (modified) llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll (+21-21) 
- (modified) llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll 
(+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll (+3-3) 
- (modified) 
llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll (+2-2) 
- (modified) 
llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll 
(+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll 
(+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll 
(+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll (+6-6) 


``diff
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index c317223f49d7c..def6addd595e8 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1698,11 +1698,6 @@ The AMDGPU backend supports the following LLVM IR 
attributes.
   
``amdgpu_max_num_work_groups`` CLANG attribute [CLANG-ATTR]_. Clang only
   emits this attribute 
when all the three numbers are >= 1.
 
- "amdgpu-no-agpr" Indicates the function 
will not require allocating AGPRs. This is only
-  relevant on subtargets 
with AGPRs. The behavior is undefined if a
-  function which requires 
AGPRs is reached through any function marked
-  with this attribute.
-
  "amdgpu-hidden-argument"

[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-num-agpr (PR #129893)

2025-03-05 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/129893
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-num-agpr (PR #129893)

2025-03-05 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/129893?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#129893** https://app.graphite.dev/github/pr/llvm/llvm-project/129893?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/129893?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#128034** https://app.graphite.dev/github/pr/llvm/llvm-project/128034?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/129893
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] release/20.x: [Clang] Treat constexpr-unknown value as invalid in `EvaluateAsInitializer` (#128409) (PR #129836)

2025-03-05 Thread via llvm-branch-commits


llvmbot wrote:

@shafik What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/129836
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)

2025-03-05 Thread Dan Liew via llvm-branch-commits


delcypher wrote:

@fmayer The usual approach for indicating instrumentation in Clang is to use 
opt-remarks. This is the approach we use for `-fbounds-safety`. 

In `-fbounds-safety` we embed "trap reasons" in debug info so that debuggers 
and symbolication tools can better understand the reason for trapping.

What's the reason for using debug info, instead of opt-remarks here?

https://github.com/llvm/llvm-project/pull/128977
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)

2025-03-05 Thread Florian Mayer via llvm-branch-commits

fmayer wrote:

> @fmayer The usual approach for indicating instrumentation in Clang is to use 
> opt-remarks. This is the approach we use for `-fbounds-safety`.
> 
> In `-fbounds-safety` we embed "trap reasons" in debug info so that debuggers 
> and symbolication tools can better understand the reason for trapping.
> 
> What's the reason for using debug info, instead of opt-remarks here?

The commit description is maybe not very clear. This is not for _compiile time_ 
as opt remarks, but for run time. By doing this, we can

1) (more importantly) use profilers to estimate how many cycles we spend on 
these checks (subject to caveats),
2) more easily see why we crashed.

https://github.com/llvm/llvm-project/pull/128977
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [DirectX] Updating DXContainer documentation to add Root Descriptors (PR #129759)

2025-03-05 Thread via llvm-branch-commits


https://github.com/joaosaffran updated 
https://github.com/llvm/llvm-project/pull/129759

>From b390cd27d2b32f0e3b3d13c8ef3020cbd6af1fa9 Mon Sep 17 00:00:00 2001
From: joaosaffran <126493771+joaosaff...@users.noreply.github.com>
Date: Tue, 4 Mar 2025 10:30:07 -0800
Subject: [PATCH 1/5] Adding root descriptor subsection

---
 llvm/docs/DirectX/DXContainer.rst | 50 ++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/llvm/docs/DirectX/DXContainer.rst 
b/llvm/docs/DirectX/DXContainer.rst
index 0e7026b03a606..14bc802ff6b97 100644
--- a/llvm/docs/DirectX/DXContainer.rst
+++ b/llvm/docs/DirectX/DXContainer.rst
@@ -496,4 +496,52 @@ signature and passed to the shader without requiring a 
constant buffer resource:
 #. **RegisterSpace**: The register space used for the binding.
 #. **Num32BitValues**: The number of 32-bit values included in this constant 
buffer.
 
-Root constants provide a fast way to pass small amounts of data directly to 
the shader without the overhead of creating and binding a constant buffer 
resource.
+Root constants provide a fast way to pass small amounts of data directly to 
the shader without the overhead 
+of creating and binding a constant buffer resource.
+
+Root Descriptor
+~~
+
+Root descriptors provide a direct mechanism for binding individual resources 
to shader stages in the Direct3D 12 
+rendering pipeline. They represent a critical interface for efficient resource 
management, allowing applications 
+to specify how shader stages access specific GPU resources.
+
+.. code-block:: cpp
+
+   // Version 1.0 Root Descriptor
+   struct RootDescriptor_V1_0 {
+  uint32_t ShaderRegister;
+  uint32_t RegisterSpace;
+   };
+   
+   // Version 1.1 Root Descriptor
+   struct RootDescriptor_V1_1 {
+  uint32_t ShaderRegister;
+  uint32_t RegisterSpace;
+  // New flags for Version 1.1
+  enum Flags {
+None= 0x0,
+DATA_STATIC = 0x1,
+DATA_STATIC_WHILE_SET_AT_EXECUTE = 0x2,
+DATA_VOLATILE   = 0x4
+  };
+  
+  // Bitfield of flags from the Flags enum
+  uint32_t Flags;
+   };
+
+The Root Descriptor structure has evolved to support two versions, providing 
enhanced flexibility and 
+performance optimization capabilities.
+
+Version 1.0 Root Descriptor
+'''
+The Version 1.0 RootDescriptor_V1_0 provides basic resource binding:
+
+#. **ShaderRegister**: The shader register where the descriptor is bound.
+#. **RegisterSpace**: The register space used for the binding.
+
+Version 1.1 Root Descriptor
+'''
+The Version 1.1 RootDescriptor_V1_1 extends the base structure with the 
following additional fields:
+
+#. **Flags**: Provides additional metadata about the descriptor's usage 
pattern.

>From 46face18e140b1313dfdc437e6f4ee03904d245a Mon Sep 17 00:00:00 2001
From: joaosaffran <126493771+joaosaff...@users.noreply.github.com>
Date: Tue, 4 Mar 2025 10:32:54 -0800
Subject: [PATCH 2/5] Fix git error

---
 llvm/docs/DirectX/DXContainer.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/docs/DirectX/DXContainer.rst 
b/llvm/docs/DirectX/DXContainer.rst
index 14bc802ff6b97..93ed9afe42b50 100644
--- a/llvm/docs/DirectX/DXContainer.rst
+++ b/llvm/docs/DirectX/DXContainer.rst
@@ -496,8 +496,7 @@ signature and passed to the shader without requiring a 
constant buffer resource:
 #. **RegisterSpace**: The register space used for the binding.
 #. **Num32BitValues**: The number of 32-bit values included in this constant 
buffer.
 
-Root constants provide a fast way to pass small amounts of data directly to 
the shader without the overhead 
-of creating and binding a constant buffer resource.
+Root constants provide a fast way to pass small amounts of data directly to 
the shader without the overhead of creating and binding a constant buffer 
resource.
 
 Root Descriptor
 ~~

>From 6a260b3fe40c05b4f159a5d26345229ae221f593 Mon Sep 17 00:00:00 2001
From: joaosaffran <126493771+joaosaff...@users.noreply.github.com>
Date: Tue, 4 Mar 2025 10:53:11 -0800
Subject: [PATCH 3/5] Try fix test

---
 llvm/docs/DirectX/DXContainer.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/DirectX/DXContainer.rst 
b/llvm/docs/DirectX/DXContainer.rst
index 93ed9afe42b50..b9a2067368e0f 100644
--- a/llvm/docs/DirectX/DXContainer.rst
+++ b/llvm/docs/DirectX/DXContainer.rst
@@ -499,7 +499,7 @@ signature and passed to the shader without requiring a 
constant buffer resource:
 Root constants provide a fast way to pass small amounts of data directly to 
the shader without the overhead of creating and binding a constant buffer 
resource.
 
 Root Descriptor
-~~
+~~~
 
 Root descriptors provide a direct mechanism for binding individual resources 
to shader stages in the Direct3D 12 
 rendering pipeline. They represent a critical interface for efficient resource

[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)

2025-03-05 Thread Florian Mayer via llvm-branch-commits


https://github.com/fmayer edited 
https://github.com/llvm/llvm-project/pull/128977
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)

2025-03-05 Thread Dan Liew via llvm-branch-commits


https://github.com/delcypher edited 
https://github.com/llvm/llvm-project/pull/128977
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)

2025-03-05 Thread Dan Liew via llvm-branch-commits


https://github.com/delcypher approved this pull request.

Thanks for explaining the purpose.

Regarding the "more easily see why we crashed." please be aware [I have a GSoC 
proposal to basically do 
this](https://discourse.llvm.org/t/clang-gsoc-2025-usability-improvements-for-trapping-undefined-behavior-sanitizer/84568)
 using the `createTrapFailureMessageFor`. So if possible please don't tackle 
what I describe in the proposal before a GSoC student has had a chance to do 
this. To be clear what you've done in this PR is different from I'm proposing 
so they don't conflict.

Also

* Please give a chance for Clang Debug Info contributors to look over this (CC 
@adrian-prantl) before landing this.
* When possible add as reviewers previous people who worked on the code. In 
this particular case I believe this was @ahatanak 

https://github.com/llvm/llvm-project/pull/128977
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)

2025-03-05 Thread Dan Liew via llvm-branch-commits



@@ -3598,6 +3598,14 @@ llvm::DIMacroFile 
*CGDebugInfo::CreateTempMacroFile(llvm::DIMacroFile *Parent,
   return DBuilder.createTempMacroFile(Parent, Line, FName);
 }
 
+llvm::DILocation *CGDebugInfo::CreateSyntheticInline(llvm::DebugLoc Location,
+ StringRef FuncName) {
+  llvm::DISubprogram *TrapSP =

delcypher wrote:

Nit. The name `TrapSP` doesn't make much sense here given this function isn't 
specifically for traps.

https://github.com/llvm/llvm-project/pull/128977
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)

2025-03-05 Thread Dan Liew via llvm-branch-commits



@@ -635,6 +635,13 @@ class CGDebugInfo {
   llvm::DILocation *CreateTrapFailureMessageFor(llvm::DebugLoc TrapLocation,
 StringRef Category,
 StringRef FailureMsg);
+  /// Create a debug location from `Location` that adds an artificial inline
+  /// frame where the frame name is FuncName
+  ///
+  /// This is used to indiciate instructions that come from compiler
+  /// instrumentation.
+  llvm::DILocation *CreateSyntheticInline(llvm::DebugLoc Location,

delcypher wrote:

Nit. Maybe call it `CreateSyntheticInlineAt` ? Those who know more about Clang 
debug info generation (e.g. @adrian-prantl @felipepiovezan @Michael137 ) might 
have ideas on a better name though.

https://github.com/llvm/llvm-project/pull/128977
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)

2025-03-05 Thread Dan Liew via llvm-branch-commits


https://github.com/delcypher edited 
https://github.com/llvm/llvm-project/pull/128977
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-05 Thread Mingming Liu via llvm-branch-commits



@@ -2791,8 +2791,26 @@ void AsmPrinter::emitConstantPool() {
 if (!CPE.isMachineConstantPoolEntry())
   C = CPE.Val.ConstVal;
 
-MCSection *S = getObjFileLowering().getSectionForConstant(
-getDataLayout(), Kind, C, Alignment);
+MCSection *S = nullptr;

mingmingl-llvm wrote:

done.

https://github.com/llvm/llvm-project/pull/129781
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-05 Thread Mingming Liu via llvm-branch-commits



@@ -1072,6 +1072,41 @@ MCSection 
*TargetLoweringObjectFileELF::getSectionForConstant(
   return DataRelROSection;
 }
 
+MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
+const DataLayout &DL, SectionKind Kind, const Constant *C, Align 
&Alignment,
+StringRef SectionPrefix) const {

mingmingl-llvm wrote:

done.

https://github.com/llvm/llvm-project/pull/129781
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-05 Thread Mingming Liu via llvm-branch-commits



@@ -203,17 +218,34 @@ void StaticDataSplitter::updateStatsWithProfiles(const 
MachineFunction &MF) {
 
 void StaticDataSplitter::annotateStaticDataWithoutProfiles(
 const MachineFunction &MF) {
+  const MachineConstantPool *MCP = MF.getConstantPool();
   for (const auto &MBB : MF) {
 for (const MachineInstr &I : MBB) {
   for (const MachineOperand &Op : I.operands()) {
-if (!Op.isGlobal())
-  continue;
-const GlobalVariable *GV =
-getLocalLinkageGlobalVariable(Op.getGlobal());
-if (!GV || GV->getName().starts_with("llvm.") ||
-!inStaticDataSection(GV, MF.getTarget()))
+if (!Op.isGlobal() && !Op.isCPI())
   continue;
-SDPI->addConstantProfileCount(GV, std::nullopt);
+if (Op.isGlobal()) {
+  const GlobalVariable *GV =
+  getLocalLinkageGlobalVariable(Op.getGlobal());
+  if (!GV || GV->getName().starts_with("llvm.") ||
+  !inStaticDataSection(GV, MF.getTarget()))
+continue;
+  SDPI->addConstantProfileCount(GV, std::nullopt);
+} else {
+  assert(Op.isCPI() && "Op must be constant pool index in this 
branch");

mingmingl-llvm wrote:

Added `getConstant` helper function to share code between profile and 
non-profile path.

https://github.com/llvm/llvm-project/pull/129781
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-05 Thread Mingming Liu via llvm-branch-commits



@@ -386,6 +386,16 @@ MCSection *TargetLoweringObjectFile::getSectionForConstant(
   return DataSection;
 }
 
+MCSection *TargetLoweringObjectFile::getSectionForConstant(
+const DataLayout &DL, SectionKind Kind, const Constant *C, Align 
&Alignment,
+StringRef SectionPrefix) const {
+  // Fallback to `getSectionForConstant` without `SectionPrefix` parameter if 
it
+  // is empty.
+  if (SectionPrefix.empty())

mingmingl-llvm wrote:

Do you mean something like `assert(!SectionPrefix.empty() && "Call another 
method if section prefix is empty` here?

I think with the refactor suggested above 
(https://github.com/llvm/llvm-project/pull/129781/files#r1980454779), we can 
allow the new interface to handle empty section prefix by falling back to the 
original interface. What do you think about it?

https://github.com/llvm/llvm-project/pull/129781
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-05 Thread Mingming Liu via llvm-branch-commits


https://github.com/mingmingl-llvm updated 
https://github.com/llvm/llvm-project/pull/129781

>From 072c44f0f9272682480cc2837196a906bd694276 Mon Sep 17 00:00:00 2001
From: mingmingl 
Date: Fri, 28 Feb 2025 14:41:56 -0800
Subject: [PATCH 1/2] [CodeGen][StaticDataSplitter]Support constant pool
 partitioning

---
 llvm/include/llvm/CodeGen/AsmPrinter.h|   8 +
 .../CodeGen/TargetLoweringObjectFileImpl.h|   6 +
 .../llvm/Target/TargetLoweringObjectFile.h|   7 +
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp|  22 ++-
 llvm/lib/CodeGen/StaticDataSplitter.cpp   |  56 +--
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  |  35 +
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp |  10 ++
 llvm/lib/Target/TargetLoweringObjectFile.cpp  |  10 ++
 llvm/lib/Target/X86/X86AsmPrinter.cpp |  10 ++
 .../AArch64/constant-pool-partition.ll| 141 ++
 .../CodeGen/X86/constant-pool-partition.ll| 131 
 11 files changed, 422 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/constant-pool-partition.ll
 create mode 100644 llvm/test/CodeGen/X86/constant-pool-partition.ll

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h 
b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 3da63af5ba571..2018f411be796 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -18,6 +18,8 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -132,6 +134,12 @@ class AsmPrinter : public MachineFunctionPass {
   /// default, this is equal to CurrentFnSym.
   MCSymbol *CurrentFnSymForSize = nullptr;
 
+  /// Provides the profile information for constants.
+  const StaticDataProfileInfo *SDPI = nullptr;
+
+  /// The profile summary information.
+  const ProfileSummaryInfo *PSI = nullptr;
+
   /// Map a basic block section ID to the begin and end symbols of that section
   ///  which determine the section's range.
   struct MBBSectionRange {
diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h 
b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 10f0594c267ae..563980fb24ab8 100644
--- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -68,6 +68,12 @@ class TargetLoweringObjectFileELF : public 
TargetLoweringObjectFile {
const Constant *C,
Align &Alignment) const override;
 
+  /// Similar to the function above, but append \p SectionSuffix to the section
+  /// name.
+  MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
+   const Constant *C, Align &Alignment,
+   StringRef SectionSuffix) const override;
+
   MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
   const TargetMachine &TM) const override;
 
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h 
b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index a5ed1b29dc1bc..1956748b8058b 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -104,6 +104,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
SectionKind Kind, const Constant *C,
Align &Alignment) const;
 
+  /// Similar to the function above, but append \p SectionSuffix to the section
+  /// name.
+  virtual MCSection *getSectionForConstant(const DataLayout &DL,
+   SectionKind Kind, const Constant *C,
+   Align &Alignment,
+   StringRef SectionSuffix) const;
+
   virtual MCSection *
   getSectionForMachineBasicBlock(const Function &F,
  const MachineBasicBlock &MBB,
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp 
b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3c4280333e76d..60018afe2f8a7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2791,8 +2791,26 @@ void AsmPrinter::emitConstantPool() {
 if (!CPE.isMachineConstantPoolEntry())
   C = CPE.Val.ConstVal;
 
-MCSection *S = getObjFileLowering().getSectionForConstant(
-getDataLayout(), Kind, C, Alignment);
+MCSection *S = nullptr;
+if (TM.Options.EnableStaticDataPartitioning) {
+  SmallString<8> SectionNameSuffix;
+  if (C && SDPI && PSI) {
+auto Count = SDPI->getConstantProfileCount(C);
+if (Count) {
+

[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)

2025-03-05 Thread John McIver via llvm-branch-commits


jmciver wrote:

@nikic and @dtcxzyw thanks for the feedback.

This patch does bypasses the need for freeze poison -> null canonicalization 
removal in InstCombine. To provide context I am seeing the lack of store of 
select freeze poison folding in uninitialized memory semantics work that I am 
doing with @nlopes.

I would like to incorporate this and then once freeze poison -> null 
canonicalization is removed from InstCombine refactor appropriately. Would this 
be acceptable?


https://github.com/llvm/llvm-project/pull/129776
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

62 matches

Mail list logo