[llvm-branch-commits] [llvm] release/20.x: [AArch64] Fix BE popcount casts. (#129879) (PR #129996)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/129996 Backport b673a59c9ae5 ab811e75734a Requested by: @davemgreen >From e0f31d9f2345b4ddf4ac96e8275524aac5e827d4 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 5 Mar 2025 11:23:33 + Subject: [PATCH 1/2] [AArch64] Add BE test coverage for popcount. NFC For #129843 (cherry picked from commit b673a59c9ae5583aa08a8d34a48f9409b660d826) --- llvm/test/CodeGen/AArch64/arm64-popcnt.ll | 161 ++ llvm/test/CodeGen/AArch64/popcount.ll | 104 ++ 2 files changed, 265 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll index ad0904ff98080..369667ec33f66 100644 --- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s ; RUN: llc < %s -mtriple=aarch64 -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s ; RUN: llc < %s -mtriple=aarch64 -mattr +cssc -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-CSSC %s +; RUN: llc < %s -mtriple=aarch64_be-none-eabi | FileCheck %s --check-prefix=CHECK-BE define i32 @cnt32_advsimd(i32 %x) nounwind readnone { ; CHECK-LABEL: cnt32_advsimd: @@ -32,6 +33,14 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone { ; CHECK-CSSC: // %bb.0: ; CHECK-CSSC-NEXT:cnt w0, w0 ; CHECK-CSSC-NEXT:ret +; +; CHECK-BE-LABEL: cnt32_advsimd: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT:fmov s0, w0 +; CHECK-BE-NEXT:cnt v0.8b, v0.8b +; CHECK-BE-NEXT:addv b0, v0.8b +; CHECK-BE-NEXT:fmov w0, s0 +; CHECK-BE-NEXT:ret %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) ret i32 %cnt } @@ -69,6 +78,16 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) { ; CHECK-CSSC-NEXT:fmov w8, s0 ; CHECK-CSSC-NEXT:cnt w0, w8 ; CHECK-CSSC-NEXT:ret +; +; CHECK-BE-LABEL: cnt32_advsimd_2: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT:rev64 v0.2s, v0.2s +; CHECK-BE-NEXT:fmov w8, s0 +; CHECK-BE-NEXT:fmov s0, w8 +; CHECK-BE-NEXT:cnt v0.8b, v0.8b +; CHECK-BE-NEXT:addv b0, v0.8b +; CHECK-BE-NEXT:fmov w0, s0 +; CHECK-BE-NEXT:ret %1 = extractelement <2 x i32> %x, i64 0 %2 = tail call i32 @llvm.ctpop.i32(i32 %1) ret i32 %2 @@ -103,6 +122,16 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone { ; CHECK-CSSC: // %bb.0: ; CHECK-CSSC-NEXT:cnt x0, x0 ; CHECK-CSSC-NEXT:ret +; +; CHECK-BE-LABEL: cnt64_advsimd: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT:fmov d0, x0 +; CHECK-BE-NEXT:rev64 v0.8b, v0.8b +; CHECK-BE-NEXT:cnt v0.8b, v0.8b +; CHECK-BE-NEXT:addv b0, v0.8b +; CHECK-BE-NEXT:rev64 v0.8b, v0.8b +; CHECK-BE-NEXT:fmov x0, d0 +; CHECK-BE-NEXT:ret %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) ret i64 %cnt } @@ -147,6 +176,22 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat { ; CHECK-CSSC: // %bb.0: ; CHECK-CSSC-NEXT:cnt w0, w0 ; CHECK-CSSC-NEXT:ret +; +; CHECK-BE-LABEL: cnt32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT:lsr w9, w0, #1 +; CHECK-BE-NEXT:mov w8, #16843009 // =0x1010101 +; CHECK-BE-NEXT:and w9, w9, #0x +; CHECK-BE-NEXT:sub w9, w0, w9 +; CHECK-BE-NEXT:lsr w10, w9, #2 +; CHECK-BE-NEXT:and w9, w9, #0x +; CHECK-BE-NEXT:and w10, w10, #0x +; CHECK-BE-NEXT:add w9, w9, w10 +; CHECK-BE-NEXT:add w9, w9, w9, lsr #4 +; CHECK-BE-NEXT:and w9, w9, #0xf0f0f0f +; CHECK-BE-NEXT:mul w8, w9, w8 +; CHECK-BE-NEXT:lsr w0, w8, #24 +; CHECK-BE-NEXT:ret %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) ret i32 %cnt } @@ -188,6 +233,22 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat { ; CHECK-CSSC: // %bb.0: ; CHECK-CSSC-NEXT:cnt x0, x0 ; CHECK-CSSC-NEXT:ret +; +; CHECK-BE-LABEL: cnt64: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT:lsr x9, x0, #1 +; CHECK-BE-NEXT:mov x8, #72340172838076673 // =0x101010101010101 +; CHECK-BE-NEXT:and x9, x9, #0x +; CHECK-BE-NEXT:sub x9, x0, x9 +; CHECK-BE-NEXT:lsr x10, x9, #2 +; CHECK-BE-NEXT:and x9, x9, #0x +; CHECK-BE-NEXT:and x10, x10, #0x +; CHECK-BE-NEXT:add x9, x9, x10 +; CHECK-BE-NEXT:add x9, x9, x9, lsr #4 +; CHECK-BE-NEXT:and x9, x9, #0xf0f0f0f0f0f0f0f +; CHECK-BE-NEXT:mul x8, x9, x8 +; CHECK-BE-NEXT:lsr x0, x8, #56 +; CHECK-BE-NEXT:ret %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) ret i64 %cnt } @@ -215,6 +276,14 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone { ; CHECK-CSSC-NEXT:cmp x8, #1 ; CHECK-CSSC-NEXT:cset w0, eq ; CHECK-CSSC-NEXT:ret +; +; CHECK-BE-LABEL: ctpop_eq_one: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT:sub x8, x0, #1 +; CHECK-BE-NEXT:eor x9, x0, x8 +; CHECK-BE-NEXT:cmp x9, x8 +; CHECK-BE-NEXT:cset w0, hi +; CHECK
[llvm-branch-commits] [llvm] release/20.x: [AArch64] Fix BE popcount casts. (#129879) (PR #129996)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/129996 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [AArch64] Fix BE popcount casts. (#129879) (PR #129996)
llvmbot wrote: @llvm/pr-subscribers-backend-aarch64 Author: None (llvmbot) Changes Backport b673a59c9ae5 ab811e75734a Requested by: @davemgreen --- Full diff: https://github.com/llvm/llvm-project/pull/129996.diff 4 Files Affected: - (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+8-2) - (modified) llvm/test/CodeGen/AArch64/arm64-popcnt.ll (+159) - (modified) llvm/test/CodeGen/AArch64/parity.ll (+1-1) - (modified) llvm/test/CodeGen/AArch64/popcount.ll (+104-1) ``diff diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index b5cca88b6b511..ca357382c472d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10783,7 +10783,10 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op, if (VT == MVT::i32) AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, AddV, DAG.getConstant(0, DL, MVT::i64)); -AddV = DAG.getNode(ISD::BITCAST, DL, VT, AddV); +else + AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, + DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v1i64, AddV), + DAG.getConstant(0, DL, MVT::i64)); if (IsParity) AddV = DAG.getNode(ISD::AND, DL, VT, AddV, DAG.getConstant(1, DL, VT)); return AddV; @@ -10792,7 +10795,10 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op, SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val); SDValue AddV = DAG.getNode(AArch64ISD::UADDV, DL, MVT::v16i8, CtPop); -AddV = DAG.getNode(ISD::BITCAST, DL, VT, AddV); +AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, + DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v2i64, AddV), + DAG.getConstant(0, DL, MVT::i64)); +AddV = DAG.getZExtOrTrunc(AddV, DL, VT); if (IsParity) AddV = DAG.getNode(ISD::AND, DL, VT, AddV, DAG.getConstant(1, DL, VT)); return AddV; diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll index ad0904ff98080..d06e42f5405ef 100644 --- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s ; RUN: llc < %s -mtriple=aarch64 -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s ; RUN: llc < %s -mtriple=aarch64 -mattr +cssc -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-CSSC %s +; RUN: llc < %s -mtriple=aarch64_be-none-eabi | FileCheck %s --check-prefix=CHECK-BE define i32 @cnt32_advsimd(i32 %x) nounwind readnone { ; CHECK-LABEL: cnt32_advsimd: @@ -32,6 +33,14 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone { ; CHECK-CSSC: // %bb.0: ; CHECK-CSSC-NEXT:cnt w0, w0 ; CHECK-CSSC-NEXT:ret +; +; CHECK-BE-LABEL: cnt32_advsimd: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT:fmov s0, w0 +; CHECK-BE-NEXT:cnt v0.8b, v0.8b +; CHECK-BE-NEXT:addv b0, v0.8b +; CHECK-BE-NEXT:fmov w0, s0 +; CHECK-BE-NEXT:ret %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) ret i32 %cnt } @@ -69,6 +78,16 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) { ; CHECK-CSSC-NEXT:fmov w8, s0 ; CHECK-CSSC-NEXT:cnt w0, w8 ; CHECK-CSSC-NEXT:ret +; +; CHECK-BE-LABEL: cnt32_advsimd_2: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT:rev64 v0.2s, v0.2s +; CHECK-BE-NEXT:fmov w8, s0 +; CHECK-BE-NEXT:fmov s0, w8 +; CHECK-BE-NEXT:cnt v0.8b, v0.8b +; CHECK-BE-NEXT:addv b0, v0.8b +; CHECK-BE-NEXT:fmov w0, s0 +; CHECK-BE-NEXT:ret %1 = extractelement <2 x i32> %x, i64 0 %2 = tail call i32 @llvm.ctpop.i32(i32 %1) ret i32 %2 @@ -103,6 +122,15 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone { ; CHECK-CSSC: // %bb.0: ; CHECK-CSSC-NEXT:cnt x0, x0 ; CHECK-CSSC-NEXT:ret +; +; CHECK-BE-LABEL: cnt64_advsimd: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT:fmov d0, x0 +; CHECK-BE-NEXT:rev64 v0.8b, v0.8b +; CHECK-BE-NEXT:cnt v0.8b, v0.8b +; CHECK-BE-NEXT:addv b0, v0.8b +; CHECK-BE-NEXT:fmov x0, d0 +; CHECK-BE-NEXT:ret %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) ret i64 %cnt } @@ -147,6 +175,22 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat { ; CHECK-CSSC: // %bb.0: ; CHECK-CSSC-NEXT:cnt w0, w0 ; CHECK-CSSC-NEXT:ret +; +; CHECK-BE-LABEL: cnt32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT:lsr w9, w0, #1 +; CHECK-BE-NEXT:mov w8, #16843009 // =0x1010101 +; CHECK-BE-NEXT:and w9, w9, #0x +; CHECK-BE-NEXT:sub w9, w0, w9 +; CHECK-BE-NEXT:lsr w10, w9, #2 +; CHECK-BE-NEXT:and w9, w9, #0x +; CHECK-BE-NEXT:and w10, w10, #0x +; CHECK-BE-NEXT:add w9, w9, w10 +; CHECK-BE-NEXT:add w9, w9, w9, lsr #4 +; CHECK-BE-NEXT:and w9, w9, #0xf0f0f0f +; CHECK-BE-NEXT:
[llvm-branch-commits] [llvm] release/20.x: [AArch64] Fix BE popcount casts. (#129879) (PR #129996)
llvmbot wrote: @alexrp What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/129996 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [AArch64] Fix SVE scalar fcopysign lowering without neon. (#129787) (PR #129997)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/129997 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [AArch64] Fix SVE scalar fcopysign lowering without neon. (#129787) (PR #129997)
llvmbot wrote: @llvm/pr-subscribers-backend-aarch64 Author: None (llvmbot) Changes Backport 4c2d1b4c53de d4ab3df320f9 Requested by: @davemgreen --- Full diff: https://github.com/llvm/llvm-project/pull/129997.diff 2 Files Affected: - (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+19) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll (+199) ``diff diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index b5cca88b6b511..62a26b0aef187 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10681,6 +10681,25 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, return convertFromScalableVector(DAG, VT, Res); } + // With SVE, but without Neon, extend the scalars to scalable vectors and use + // a SVE FCOPYSIGN. + if (!VT.isVector() && !Subtarget->isNeonAvailable() && + Subtarget->isSVEorStreamingSVEAvailable()) { +if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64) + return SDValue(); +EVT SVT = getPackedSVEVectorVT(VT); + +SDValue Ins1 = +DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SVT, DAG.getUNDEF(SVT), In1, +DAG.getConstant(0, DL, MVT::i64)); +SDValue Ins2 = +DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SVT, DAG.getUNDEF(SVT), In2, +DAG.getConstant(0, DL, MVT::i64)); +SDValue FCS = DAG.getNode(ISD::FCOPYSIGN, DL, SVT, Ins1, Ins2); +return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, FCS, + DAG.getConstant(0, DL, MVT::i64)); + } + auto BitCast = [this](EVT VT, SDValue Op, SelectionDAG &DAG) { if (VT.isScalableVector()) return getSVESafeBitCast(VT, Op, DAG); diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll index 2282e74af5d00..79921e25caf53 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll @@ -8,6 +8,205 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" +define void @test_copysign_f16(ptr %ap, ptr %bp) { +; SVE-LABEL: test_copysign_f16: +; SVE: // %bb.0: +; SVE-NEXT:ldr h0, [x1] +; SVE-NEXT:ldr h1, [x0] +; SVE-NEXT:and z0.h, z0.h, #0x8000 +; SVE-NEXT:and z1.h, z1.h, #0x7fff +; SVE-NEXT:orr z0.d, z1.d, z0.d +; SVE-NEXT:str h0, [x0] +; SVE-NEXT:ret +; +; SVE2-LABEL: test_copysign_f16: +; SVE2: // %bb.0: +; SVE2-NEXT:mov z0.h, #32767 // =0x7fff +; SVE2-NEXT:ldr h1, [x1] +; SVE2-NEXT:ldr h2, [x0] +; SVE2-NEXT:bsl z2.d, z2.d, z1.d, z0.d +; SVE2-NEXT:str h2, [x0] +; SVE2-NEXT:ret +; +; NONEON-NOSVE-LABEL: test_copysign_f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT:sub sp, sp, #16 +; NONEON-NOSVE-NEXT:.cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT:ldr h0, [x0] +; NONEON-NOSVE-NEXT:ldr h1, [x1] +; NONEON-NOSVE-NEXT:fcvt s0, h0 +; NONEON-NOSVE-NEXT:str h1, [sp, #12] +; NONEON-NOSVE-NEXT:ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT:tst w8, #0x80 +; NONEON-NOSVE-NEXT:fabs s0, s0 +; NONEON-NOSVE-NEXT:fneg s1, s0 +; NONEON-NOSVE-NEXT:fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT:fcvt h0, s0 +; NONEON-NOSVE-NEXT:str h0, [x0] +; NONEON-NOSVE-NEXT:add sp, sp, #16 +; NONEON-NOSVE-NEXT:ret + %a = load half, ptr %ap + %b = load half, ptr %bp + %r = call half @llvm.copysign.f16(half %a, half %b) + store half %r, ptr %ap + ret void +} + +define void @test_copysign_bf16(ptr %ap, ptr %bp) { +; SVE-LABEL: test_copysign_bf16: +; SVE: // %bb.0: +; SVE-NEXT:sub sp, sp, #16 +; SVE-NEXT:.cfi_def_cfa_offset 16 +; SVE-NEXT:ldr h0, [x0] +; SVE-NEXT:ldr h1, [x1] +; SVE-NEXT:fmov w8, s0 +; SVE-NEXT:str h1, [sp, #12] +; SVE-NEXT:ldrb w9, [sp, #13] +; SVE-NEXT:and w8, w8, #0x7fff +; SVE-NEXT:tst w9, #0x80 +; SVE-NEXT:fmov s0, w8 +; SVE-NEXT:eor w8, w8, #0x8000 +; SVE-NEXT:fmov s1, w8 +; SVE-NEXT:fcsel h0, h1, h0, ne +; SVE-NEXT:str h0, [x0] +; SVE-NEXT:add sp, sp, #16 +; SVE-NEXT:ret +; +; SVE2-LABEL: test_copysign_bf16: +; SVE2: // %bb.0: +; SVE2-NEXT:sub sp, sp, #16 +; SVE2-NEXT:.cfi_def_cfa_offset 16 +; SVE2-NEXT:ldr h0, [x0] +; SVE2-NEXT:ldr h1, [x1] +; SVE2-NEXT:fmov w8, s0 +; SVE2-NEXT:str h1, [sp, #12] +; SVE2-NEXT:ldrb w9, [sp, #13] +; SVE2-NEXT:and w8, w8, #0x7fff +; SVE2-NEXT:tst w9, #0x80 +; SVE2-NEXT:fmov s0, w8 +; SVE2-NEXT:eor w8, w8, #0x8000 +; SVE2-NEXT:fmov s1, w8 +; SVE2-NEXT:fcsel h0, h1, h0, ne +; SVE2-NEXT:str h0, [x0] +; SVE2-NEXT:add sp, sp, #16 +; SVE2-NEXT:ret +; +; NONEON-NOSVE-LABEL: test_copysign_bf16: +; NONEON-NOSVE: // %bb.0: +; NONEON
[llvm-branch-commits] [llvm] 56f8d69 - Revert "[LTO][Pipelines][Coro] De-duplicate Coro passes (#128654)"
Author: Vitaly Buka Date: 2025-03-05T18:40:30-08:00 New Revision: 56f8d690f4d0c812c6e7b4173d4be940dfa1bf36 URL: https://github.com/llvm/llvm-project/commit/56f8d690f4d0c812c6e7b4173d4be940dfa1bf36 DIFF: https://github.com/llvm/llvm-project/commit/56f8d690f4d0c812c6e7b4173d4be940dfa1bf36.diff LOG: Revert "[LTO][Pipelines][Coro] De-duplicate Coro passes (#128654)" This reverts commit 31897e651a1aa69207806d497a7080e252c53ebe. Added: Modified: llvm/lib/Passes/PassBuilderPipelines.cpp llvm/test/LTO/X86/coro.ll llvm/test/Other/new-pm-defaults.ll llvm/test/Other/new-pm-lto-defaults.ll Removed: diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 546a5eb1ec283..07db107325f02 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -419,16 +419,14 @@ static bool isLTOPostLink(ThinOrFullLTOPhase Phase) { // Helper to wrap conditionally Coro passes. static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase) { + // TODO: Skip passes according to Phase. ModulePassManager CoroPM; - if (!isLTOPostLink(Phase)) -CoroPM.addPass(CoroEarlyPass()); - if (!isLTOPreLink(Phase)) { -CGSCCPassManager CGPM; -CGPM.addPass(CoroSplitPass()); -CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); -CoroPM.addPass(CoroCleanupPass()); -CoroPM.addPass(GlobalDCEPass()); - } + CoroPM.addPass(CoroEarlyPass()); + CGSCCPassManager CGPM; + CGPM.addPass(CoroSplitPass()); + CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); + CoroPM.addPass(CoroCleanupPass()); + CoroPM.addPass(GlobalDCEPass()); return CoroConditionalWrapper(std::move(CoroPM)); } @@ -1012,7 +1010,7 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level, MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( RequireAnalysisPass())); - if (!isLTOPreLink(Phase)) { + if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) { MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0)); MainCGPipeline.addPass(CoroAnnotationElidePass()); } @@ -1062,7 +1060,7 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level, buildFunctionSimplificationPipeline(Level, Phase), PTO.EagerlyInvalidateAnalyses)); - if (!isLTOPreLink(Phase)) { + if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) { MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( CoroSplitPass(Level != OptimizationLevel::O0))); MPM.addPass( @@ -1122,8 +1120,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // Do basic inference of function attributes from known properties of system // libraries and other oracles. MPM.addPass(InferFunctionAttrsPass()); -if (!isLTOPostLink(Phase)) - MPM.addPass(CoroEarlyPass()); +MPM.addPass(CoroEarlyPass()); FunctionPassManager EarlyFPM; EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false)); @@ -1293,7 +1290,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // and argument promotion. MPM.addPass(DeadArgumentEliminationPass()); - if (!isLTOPreLink(Phase)) + if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) MPM.addPass(CoroCleanupPass()); // Optimize globals now that functions are fully simplified. @@ -1958,6 +1955,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, return MPM; } + // TODO: Skip to match buildCoroWrapper. + MPM.addPass(CoroEarlyPass()); + // Optimize globals to try and fold them into constants. MPM.addPass(GlobalOptPass()); diff --git a/llvm/test/LTO/X86/coro.ll b/llvm/test/LTO/X86/coro.ll index f9830d964bc69..cde398dd76d85 100644 --- a/llvm/test/LTO/X86/coro.ll +++ b/llvm/test/LTO/X86/coro.ll @@ -1,6 +1,4 @@ -; RUN: opt %s -passes='lto-pre-link' -S -o %t1.ll -; RUN: FileCheck %s --check-prefixes=CHECK,PRELINK --implicit-check-not="call void @llvm.coro" --input-file=%t1.ll -; RUN: llvm-as %t1.ll -o %t1.bc +; RUN: llvm-as %s -o %t1.bc ; RUN: llvm-lto2 run %t1.bc -o %t2.o -r=%t1.bc,test,plx -r=%t1.bc,extern_func,plx -save-temps ; RUN: llvm-dis %t2.o.0.5.precodegen.bc -o - | FileCheck %s --implicit-check-not="call void @llvm.coro" @@ -9,9 +7,7 @@ target triple = "x86_64-unknown-fuchsia" declare void @extern_func() -; CHECK: define{{.*}} void @test( -; PRELINK: call ptr @llvm.coro.subfn.addr -; PRELINK: call ptr @llvm.coro.subfn.addr +; CHECK: define {{.*}} void @test( define void @test(ptr %hdl) { call void @llvm.coro.resume(ptr %hdl) call void @llvm.coro.destroy(ptr %hdl) diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 30ff1a5879df2..c554fdbf4c799 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -230,13 +230,13 @@ ;
[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)
@@ -3124,6 +3124,19 @@ inline auto m_c_LogicalOp(const LHS &L, const RHS &R) { return m_LogicalOp(L, R); } +struct GuaranteedNotToBeUndefOrPoison_match { + template bool match(ITy *V) { +if (auto *AsValue = dyn_cast(V)) + return isGuaranteedNotToBeUndefOrPoison(AsValue); jmciver wrote: I'll fix the helper to take caller context. https://github.com/llvm/llvm-project/pull/129776 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)
@@ -4813,15 +4813,22 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid // duplicating logic for binops at least. auto getUndefReplacement = [&I](Type *Ty) { -Constant *BestValue = nullptr; -Constant *NullValue = Constant::getNullValue(Ty); +Value *BestValue = nullptr; +Value *NullValue = Constant::getNullValue(Ty); for (const auto *U : I.users()) { - Constant *C = NullValue; + Value *C = NullValue; if (match(U, m_Or(m_Value(), m_Value( C = ConstantInt::getAllOnesValue(Ty); else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value( C = ConstantInt::getTrue(Ty); - + else if (I.hasOneUse() && jmciver wrote: Good point! I'll fix this as well. https://github.com/llvm/llvm-project/pull/129776 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)
@@ -3124,6 +3124,19 @@ inline auto m_c_LogicalOp(const LHS &L, const RHS &R) { return m_LogicalOp(L, R); } +struct GuaranteedNotToBeUndefOrPoison_match { + template bool match(ITy *V) { +if (auto *AsValue = dyn_cast(V)) + return isGuaranteedNotToBeUndefOrPoison(AsValue); dtcxzyw wrote: I don't like this helper. Some context information (e.g., AC/DT/CxtI) is available in InstCombine. They are useful to get a more precise analysis result. https://github.com/llvm/llvm-project/pull/129776 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)
@@ -4813,15 +4813,22 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid // duplicating logic for binops at least. auto getUndefReplacement = [&I](Type *Ty) { -Constant *BestValue = nullptr; -Constant *NullValue = Constant::getNullValue(Ty); +Value *BestValue = nullptr; +Value *NullValue = Constant::getNullValue(Ty); for (const auto *U : I.users()) { - Constant *C = NullValue; + Value *C = NullValue; if (match(U, m_Or(m_Value(), m_Value( C = ConstantInt::getAllOnesValue(Ty); else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value( C = ConstantInt::getTrue(Ty); - + else if (I.hasOneUse() && dtcxzyw wrote: If `I` has only one use, it must have only one user. I would like to hoist this logic out of the loop. ``` if (I.hasOneUse() && match(I->user_back(), m_c_Select(m_Specific(&I), m_Value(Arm)) && isGuaranteedNotToBeUndefOrPoison(Arm, &AC, &DT, &I)) return Arm; // existing code Constant *BestValue = nullptr; Constant *NullValue = Constant::getNullValue(Ty); ... ``` https://github.com/llvm/llvm-project/pull/129776 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-agpr-alloc (PR #129893)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/129893 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-agpr-alloc (PR #129893)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/129893 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)
dtcxzyw wrote: > I would like to incorporate this and then once freeze poison -> null > canonicalization is removed from InstCombine refactor appropriately. Would > this be acceptable? I don't mean to block this patch. I just worry that these patches may not be well tested (fuzzers/compile-time tracker/llvm-opt-benchmark) until we remove the canonicalization. https://github.com/llvm/llvm-project/pull/129776 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-agpr-alloc (PR #129893)
@@ -603,11 +601,7 @@ SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const { if (MinNumAGPRs == DefaultNumAGPR.first) { // Default to splitting half the registers if AGPRs are required. - - if (MFI->mayNeedAGPRs()) -MinNumAGPRs = MaxNumAGPRs = MaxVectorRegs / 2; - else -MinNumAGPRs = 0; arsenm wrote: mayNeedAGPRs is a wrapper around the attribute now, this is just redundant now https://github.com/llvm/llvm-project/pull/129893 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-agpr-alloc (PR #129893)
arsenm wrote: ### Merge activity * **Mar 5, 9:11 PM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/129893). https://github.com/llvm/llvm-project/pull/129893 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-num-agpr (PR #129893)
@@ -603,11 +601,7 @@ SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const { if (MinNumAGPRs == DefaultNumAGPR.first) { // Default to splitting half the registers if AGPRs are required. - - if (MFI->mayNeedAGPRs()) -MinNumAGPRs = MaxNumAGPRs = MaxVectorRegs / 2; - else -MinNumAGPRs = 0; perlfu wrote: I guess the removal of the forced minima yields no functional change? https://github.com/llvm/llvm-project/pull/129893 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)
jmciver wrote: @dtcxzyw I was not aware I should be using a fuzzer. What tool would you recommend? >From a correctness standpoint the match is relatively narrow and I have >discussed with @nlopes extensively. The patch has been tested with bootstrap >build of LLVM running all regressions. Additionally I have used the >llvm-test-suite for correctness checks. I have sent a request to @nikita asking for compile-time tracker access and can report results once available. I also have used the patch in conjunction with 15+ Phoronix benchmarks. The optimization involving the fold of global is something I have seen in our memory semantics work in testing FFTW. https://github.com/llvm/llvm-project/pull/129776 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port FEntryInserter to NPM (PR #129857)
optimisan wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/129857?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#129857** https://app.graphite.dev/github/pr/llvm/llvm-project/129857?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/129857?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#129853** https://app.graphite.dev/github/pr/llvm/llvm-project/129853?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#129828** https://app.graphite.dev/github/pr/llvm/llvm-project/129828?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/129857 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [Clang] Treat constexpr-unknown value as invalid in `EvaluateAsInitializer` (#128409) (PR #129836)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/129836 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port XRayInstrumentation to NPM (PR #129865)
optimisan wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/129865?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#129866** https://app.graphite.dev/github/pr/llvm/llvm-project/129866?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#129865** https://app.graphite.dev/github/pr/llvm/llvm-project/129865?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/129865?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#129857** https://app.graphite.dev/github/pr/llvm/llvm-project/129857?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#129853** https://app.graphite.dev/github/pr/llvm/llvm-project/129853?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#129828** https://app.graphite.dev/github/pr/llvm/llvm-project/129828?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/129865 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [Clang] Treat constexpr-unknown value as invalid in `EvaluateAsInitializer` (#128409) (PR #129836)
https://github.com/cor3ntin approved this pull request. https://github.com/llvm/llvm-project/pull/129836 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port FEntryInserter to NPM (PR #129857)
https://github.com/optimisan created https://github.com/llvm/llvm-project/pull/129857 None >From 2a78a73afa6bd50b0c9a71da41993917eba14587 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 5 Mar 2025 09:19:08 + Subject: [PATCH] [CodeGen][NPM] Port FEntryInserter to NPM --- llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/FEntryInserter.cpp | 25 +++ llvm/lib/Passes/PassBuilder.cpp | 1 + 6 files changed, 25 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 5f8e55d783161..63917b2b7f729 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -112,7 +112,7 @@ void initializeExpandPostRALegacyPass(PassRegistry &); void initializeExpandReductionsPass(PassRegistry &); void initializeExpandVariadicsPass(PassRegistry &); void initializeExternalAAWrapperPassPass(PassRegistry &); -void initializeFEntryInserterPass(PassRegistry &); +void initializeFEntryInserterLegacyPass(PassRegistry &); void initializeFinalizeISelPass(PassRegistry &); void initializeFinalizeMachineBundlesPass(PassRegistry &); void initializeFixIrreduciblePass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 4db489d804013..bab475d740467 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -33,6 +33,7 @@ #include "llvm/CodeGen/ExpandMemCmp.h" #include "llvm/CodeGen/ExpandPostRAPseudos.h" #include "llvm/CodeGen/ExpandReductions.h" +#include "llvm/CodeGen/FEntryInserter.h" #include "llvm/CodeGen/FinalizeISel.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GlobalMerge.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index d032087fa7073..667a7352930ea 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -142,6 +142,7 @@ MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass()) MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass()) MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass()) MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass()) +MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass()) MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass()) MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass()) MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass()) @@ -258,7 +259,6 @@ DUMMY_MACHINE_FUNCTION_PASS("cfi-fixup", CFIFixupPass) DUMMY_MACHINE_FUNCTION_PASS("cfi-instr-inserter", CFIInstrInserterPass) DUMMY_MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass) DUMMY_MACHINE_FUNCTION_PASS("dot-machine-cfg", MachineCFGPrinter) -DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass) DUMMY_MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", FixupStatepointCallerSavedPass) DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", MIRProfileLoaderNewPass) DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 2cc4bf14e9804..effb556e63435 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -43,7 +43,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeExpandLargeFpConvertLegacyPassPass(Registry); initializeExpandMemCmpLegacyPassPass(Registry); initializeExpandPostRALegacyPass(Registry); - initializeFEntryInserterPass(Registry); + initializeFEntryInserterLegacyPass(Registry); initializeFinalizeISelPass(Registry); initializeFinalizeMachineBundlesPass(Registry); initializeFixupStatepointCallerSavedPass(Registry); diff --git a/llvm/lib/CodeGen/FEntryInserter.cpp b/llvm/lib/CodeGen/FEntryInserter.cpp index 68304dd41db04..4f1bd7df6a204 100644 --- a/llvm/lib/CodeGen/FEntryInserter.cpp +++ b/llvm/lib/CodeGen/FEntryInserter.cpp @@ -10,9 +10,11 @@ // //===--===// +#include "llvm/CodeGen/FEntryInserter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Function.h" @@ -21,17 +23,30 @@ using namespace llvm; namespace { -struct FEntryInserter : public MachineFunctionPass { +struct FEntryInserter { + bool run(MachineFunction &MF); +}; + +struct FEntryInserterLegacy : public MachineFunctionPass { static char ID; // Pass identification,
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port FEntryInserter to NPM (PR #129857)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/129857 >From 0ba6ca6ef0172f61f23cfee8d20a59e1138d5dfc Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 5 Mar 2025 09:19:08 + Subject: [PATCH] [CodeGen][NPM] Port FEntryInserter to NPM --- llvm/include/llvm/CodeGen/FEntryInserter.h| 24 ++ llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/FEntryInserter.cpp | 25 +++ llvm/lib/Passes/PassBuilder.cpp | 1 + 7 files changed, 49 insertions(+), 8 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/FEntryInserter.h diff --git a/llvm/include/llvm/CodeGen/FEntryInserter.h b/llvm/include/llvm/CodeGen/FEntryInserter.h new file mode 100644 index 0..16c5372d049fa --- /dev/null +++ b/llvm/include/llvm/CodeGen/FEntryInserter.h @@ -0,0 +1,24 @@ +//===- llvm/CodeGen/FEntryInserter.h *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_FENTRYINSERTER_H +#define LLVM_CODEGEN_FENTRYINSERTER_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class FEntryInserterPass : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_FENTRYINSERTER_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 5f8e55d783161..63917b2b7f729 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -112,7 +112,7 @@ void initializeExpandPostRALegacyPass(PassRegistry &); void initializeExpandReductionsPass(PassRegistry &); void initializeExpandVariadicsPass(PassRegistry &); void initializeExternalAAWrapperPassPass(PassRegistry &); -void initializeFEntryInserterPass(PassRegistry &); +void initializeFEntryInserterLegacyPass(PassRegistry &); void initializeFinalizeISelPass(PassRegistry &); void initializeFinalizeMachineBundlesPass(PassRegistry &); void initializeFixIrreduciblePass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 4db489d804013..bab475d740467 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -33,6 +33,7 @@ #include "llvm/CodeGen/ExpandMemCmp.h" #include "llvm/CodeGen/ExpandPostRAPseudos.h" #include "llvm/CodeGen/ExpandReductions.h" +#include "llvm/CodeGen/FEntryInserter.h" #include "llvm/CodeGen/FinalizeISel.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GlobalMerge.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index d032087fa7073..667a7352930ea 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -142,6 +142,7 @@ MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass()) MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass()) MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass()) MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass()) +MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass()) MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass()) MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass()) MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass()) @@ -258,7 +259,6 @@ DUMMY_MACHINE_FUNCTION_PASS("cfi-fixup", CFIFixupPass) DUMMY_MACHINE_FUNCTION_PASS("cfi-instr-inserter", CFIInstrInserterPass) DUMMY_MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass) DUMMY_MACHINE_FUNCTION_PASS("dot-machine-cfg", MachineCFGPrinter) -DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass) DUMMY_MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", FixupStatepointCallerSavedPass) DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", MIRProfileLoaderNewPass) DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 2cc4bf14e9804..effb556e63435 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -43,7 +43,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeExpandLargeFpConvertLegacyPassPass(Registry); initializeExpandMemCmpLegacyPassPass(Registry); initializeExpandPostRALegacyPass(Registry); - initializeFEntryInserterPass(Registry); + initial
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port PatchableFunction to NPM (PR #129866)
https://github.com/optimisan created https://github.com/llvm/llvm-project/pull/129866 None >From c9386f19d4a87f9fd88bb96aa0c23eba638e96da Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 5 Mar 2025 10:34:25 + Subject: [PATCH] [CodeGen][NPM] Port PatchableFunction to NPM --- llvm/include/llvm/CodeGen/PatchableFunction.h | 29 +++ llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/PatchableFunction.cpp| 37 ++- llvm/lib/Passes/PassBuilder.cpp | 1 + 7 files changed, 61 insertions(+), 13 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/PatchableFunction.h diff --git a/llvm/include/llvm/CodeGen/PatchableFunction.h b/llvm/include/llvm/CodeGen/PatchableFunction.h new file mode 100644 index 0..d10dcfbc1f015 --- /dev/null +++ b/llvm/include/llvm/CodeGen/PatchableFunction.h @@ -0,0 +1,29 @@ +//===- llvm/CodeGen/PatchableFunction.h -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_PATCHABLEFUNCTION_H +#define LLVM_CODEGEN_PATCHABLEFUNCTION_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class PatchableFunctionPass : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); + + MachineFunctionProperties getRequiredProperties() const { +return MachineFunctionProperties().set( +MachineFunctionProperties::Property::NoVRegs); + } +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_PATCHABLEFUNCTION_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index dcfd9fc6a86b9..f1c16e3b1cb40 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -225,7 +225,7 @@ void initializeOptimizePHIsLegacyPass(PassRegistry &); void initializePEIPass(PassRegistry &); void initializePHIEliminationPass(PassRegistry &); void initializePartiallyInlineLibCallsLegacyPassPass(PassRegistry &); -void initializePatchableFunctionPass(PassRegistry &); +void initializePatchableFunctionLegacyPass(PassRegistry &); void initializePeepholeOptimizerLegacyPass(PassRegistry &); void initializePhiValuesWrapperPassPass(PassRegistry &); void initializePhysicalRegisterUsageInfoWrapperLegacyPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 426dc6c7eacfd..aab2c58ac0f78 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -59,6 +59,7 @@ #include "llvm/CodeGen/MachineVerifier.h" #include "llvm/CodeGen/OptimizePHIs.h" #include "llvm/CodeGen/PHIElimination.h" +#include "llvm/CodeGen/PatchableFunction.h" #include "llvm/CodeGen/PeepholeOptimizer.h" #include "llvm/CodeGen/PostRASchedulerList.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 8b1373c0ffefd..bedbc3e88a7ce 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -152,6 +152,7 @@ MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass(TM)) MACHINE_FUNCTION_PASS("machinelicm", MachineLICMPass()) MACHINE_FUNCTION_PASS("no-op-machine-function", NoOpMachineFunctionPass()) MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass()) +MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass()) MACHINE_FUNCTION_PASS("peephole-opt", PeepholeOptimizerPass()) MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass()) MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass(TM)) @@ -279,7 +280,6 @@ DUMMY_MACHINE_FUNCTION_PASS("machine-sanmd", MachineSanitizerBinaryMetadata) DUMMY_MACHINE_FUNCTION_PASS("machine-uniformity", MachineUniformityInfoWrapperPass) DUMMY_MACHINE_FUNCTION_PASS("machineinstr-printer", MachineFunctionPrinterPass) DUMMY_MACHINE_FUNCTION_PASS("mirfs-discriminators", MIRAddFSDiscriminatorsPass) -DUMMY_MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass) DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass) DUMMY_MACHINE_FUNCTION_PASS("print-machine-uniformity", MachineUniformityInfoPrinterPass) DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index b299983503232..375176ed4b1ce 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port PatchableFunction to NPM (PR #129866)
optimisan wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/129866?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#129866** https://app.graphite.dev/github/pr/llvm/llvm-project/129866?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/129866?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#129865** https://app.graphite.dev/github/pr/llvm/llvm-project/129865?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#129857** https://app.graphite.dev/github/pr/llvm/llvm-project/129857?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#129853** https://app.graphite.dev/github/pr/llvm/llvm-project/129853?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#129828** https://app.graphite.dev/github/pr/llvm/llvm-project/129828?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/129866 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port XRayInstrumentation to NPM (PR #129865)
https://github.com/optimisan created https://github.com/llvm/llvm-project/pull/129865 None >From 32a8bd59f64750fb3c2e72a7e26ba7a81ff86210 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 5 Mar 2025 10:11:27 + Subject: [PATCH] [CodeGen][NPM] Port XRayInstrumentation to NPM --- .../llvm/CodeGen/XRayInstrumentation.h| 24 + llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/FEntryInserter.cpp | 8 +- llvm/lib/CodeGen/XRayInstrumentation.cpp | 90 +++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/test/CodeGen/X86/xray-empty-firstmbb.mir | 1 + .../X86/xray-multiplerets-in-blocks.mir | 1 + 10 files changed, 110 insertions(+), 22 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/XRayInstrumentation.h diff --git a/llvm/include/llvm/CodeGen/XRayInstrumentation.h b/llvm/include/llvm/CodeGen/XRayInstrumentation.h new file mode 100644 index 0..ed39a7f3c1654 --- /dev/null +++ b/llvm/include/llvm/CodeGen/XRayInstrumentation.h @@ -0,0 +1,24 @@ +//===- llvm/CodeGen/XRayInstrumentation.h *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_XRAYINSTRUMENTATION_H +#define LLVM_CODEGEN_XRAYINSTRUMENTATION_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class XRayInstrumentationPass : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_XRAYINSTRUMENTATION_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 63917b2b7f729..dcfd9fc6a86b9 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -320,7 +320,7 @@ void initializeVirtRegRewriterPass(PassRegistry &); void initializeWasmEHPreparePass(PassRegistry &); void initializeWinEHPreparePass(PassRegistry &); void initializeWriteBitcodePassPass(PassRegistry &); -void initializeXRayInstrumentationPass(PassRegistry &); +void initializeXRayInstrumentationLegacyPass(PassRegistry &); } // end namespace llvm diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index bab475d740467..426dc6c7eacfd 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -85,6 +85,7 @@ #include "llvm/CodeGen/UnreachableBlockElim.h" #include "llvm/CodeGen/WasmEHPrepare.h" #include "llvm/CodeGen/WinEHPrepare.h" +#include "llvm/CodeGen/XRayInstrumentation.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/IRPrinter/IRPrintingPasses.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 667a7352930ea..8b1373c0ffefd 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -188,6 +188,7 @@ MACHINE_FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass()) MACHINE_FUNCTION_PASS("two-address-instruction", TwoAddressInstructionPass()) MACHINE_FUNCTION_PASS("verify", MachineVerifierPass()) MACHINE_FUNCTION_PASS("verify", MachineTraceMetricsVerifierPass()) +MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass()) #undef MACHINE_FUNCTION_PASS #ifndef MACHINE_FUNCTION_PASS_WITH_PARAMS @@ -296,5 +297,4 @@ DUMMY_MACHINE_FUNCTION_PASS("stack-frame-layout", StackFrameLayoutAnalysisPass) DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass) DUMMY_MACHINE_FUNCTION_PASS("unpack-mi-bundles", UnpackMachineBundlesPass) DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass) -DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass) #undef DUMMY_MACHINE_FUNCTION_PASS diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index effb556e63435..b299983503232 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -144,5 +144,5 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeVirtRegRewriterPass(Registry); initializeWasmEHPreparePass(Registry); initializeWinEHPreparePass(Registry); - initializeXRayInstrumentationPass(Registry); + initializeXRayInstrumentationLegacyPass(Registry); } diff --git a/llvm/lib/CodeGen/FEntryInserter.cpp b/llvm/lib/CodeGen/FEntryInserter.cpp index 4f1bd7df6a204..79949dac51448 100644 --- a/llvm/lib/CodeGen/FEntryInserter.cpp +++ b/llvm/lib/Co
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port MachineBlockPlacementStats to NPM (PR #129853)
https://github.com/optimisan created https://github.com/llvm/llvm-project/pull/129853 None >From a01bc11d08290eab70e8e48858791cfbeb4123e6 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 5 Mar 2025 08:59:23 + Subject: [PATCH] [CodeGen][NPM] Port MachineBlockPlacementStats to NPM --- .../llvm/CodeGen/MachineBlockPlacement.h | 8 llvm/include/llvm/InitializePasses.h | 2 +- .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/MachineBlockPlacement.cpp| 45 ++- 5 files changed, 44 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineBlockPlacement.h b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h index 8003b52fa6a3c..3d23ce7001071 100644 --- a/llvm/include/llvm/CodeGen/MachineBlockPlacement.h +++ b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h @@ -25,6 +25,14 @@ class MachineBlockPlacementPass MachineFunctionAnalysisManager &MFAM); }; +class MachineBlockPlacementStatsPass +: public PassInfoMixin { + +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + } // namespace llvm #endif // LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 1ed7cbe976b9b..5f8e55d783161 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -185,7 +185,7 @@ void initializeMIRNamerPass(PassRegistry &); void initializeMIRPrintingPassPass(PassRegistry &); void initializeMachineBlockFrequencyInfoWrapperPassPass(PassRegistry &); void initializeMachineBlockPlacementLegacyPass(PassRegistry &); -void initializeMachineBlockPlacementStatsPass(PassRegistry &); +void initializeMachineBlockPlacementStatsLegacyPass(PassRegistry &); void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &); void initializeMachineCFGPrinterPass(PassRegistry &); void initializeMachineCSELegacyPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 8957011ca948c..d032087fa7073 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -137,6 +137,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", VirtRegMapAnalysis()) #ifndef MACHINE_FUNCTION_PASS #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) #endif +MACHINE_FUNCTION_PASS("block-placement-stats", MachineBlockPlacementStatsPass()) MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass()) MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass()) MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass()) @@ -250,7 +251,6 @@ DUMMY_MACHINE_MODULE_PASS("mir-strip-debug", StripDebugMachineModulePass) #endif DUMMY_MACHINE_FUNCTION_PASS("bbsections-prepare", BasicBlockSectionsPass) DUMMY_MACHINE_FUNCTION_PASS("bbsections-profile-reader", BasicBlockSectionsProfileReaderPass) -DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", MachineBlockPlacementStatsPass) DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass) DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass) DUMMY_MACHINE_FUNCTION_PASS("cfguard-longjmp", CFGuardLongjmpPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 93729f08a8721..2cc4bf14e9804 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -73,7 +73,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMIRProfileLoaderPassPass(Registry); initializeMachineBlockFrequencyInfoWrapperPassPass(Registry); initializeMachineBlockPlacementLegacyPass(Registry); - initializeMachineBlockPlacementStatsPass(Registry); + initializeMachineBlockPlacementStatsLegacyPass(Registry); initializeMachineCFGPrinterPass(Registry); initializeMachineCSELegacyPass(Registry); initializeMachineCombinerPass(Registry); diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 322655c0c998b..b6dd374cf1b31 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -3829,21 +3829,35 @@ namespace { /// placement. This is separate from the actual placement pass so that they can /// be computed in the absence of any placement transformations or when using /// alternative placement strategies. -class MachineBlockPlacementStats : public MachineFunctionPass { +class MachineBlockPlacementStats { /// A handle to the branch probability pass. const MachineBranchProbabilityInfo *MBPI; /// A handle to the function-wide block frequency pass. const MachineBlockFrequencyInfo *MBFI; +public: + MachineBlockPlacementStats(const MachineBranchProbabilityInfo *MBPI, + const MachineBlockFrequencyInfo *MBFI) +
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port MachineBlockPlacementStats to NPM (PR #129853)
https://github.com/optimisan ready_for_review https://github.com/llvm/llvm-project/pull/129853 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: Reland "[LV]: Teach LV to recursively (de)interleave." (#125094) (PR #128389)
https://github.com/hassnaaHamdi closed https://github.com/llvm/llvm-project/pull/128389 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)
Michael137 wrote: Could you elaborate on how this will be used/the motivation for this? https://github.com/llvm/llvm-project/pull/128977 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)
nikic wrote: > We can do this fold in InstSimplify: https://alive2.llvm.org/ce/z/Dm53TP The transform is only valid if the freeze(poison) is one-use. And I don't think that InstSimplify should be doing any use-based checks. So I think InstCombine is the right place for it. https://github.com/llvm/llvm-project/pull/129776 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)
@@ -0,0 +1,108 @@ +//===- HLSLRootSignature.cpp - HLSL Root Signature helper objects +//--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +/// +/// \file This file contains helpers for working with HLSL Root Signatures. +/// +//===--===// + +#include "llvm/Frontend/HLSL/HLSLRootSignature.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" + +namespace llvm { +namespace hlsl { +namespace rootsig { + +// Static helper functions + +static MDString *ClauseTypeToName(LLVMContext &Ctx, ClauseType Type) { + StringRef Name; + switch (Type) { + case ClauseType::CBuffer: +Name = "CBV"; +break; + case ClauseType::SRV: +Name = "SRV"; +break; + case ClauseType::UAV: +Name = "UAV"; +break; + case ClauseType::Sampler: +Name = "Sampler"; +break; + } + return MDString::get(Ctx, Name); +} + +// Helper struct so that we can use the overloaded notation of std::visit +template struct OverloadBuilds : Ts... { + using Ts::operator()...; +}; +template OverloadBuilds(Ts...) -> OverloadBuilds; + +MDNode *MetadataBuilder::BuildRootSignature() { + for (const RootElement &Element : Elements) { +MDNode *ElementMD = +std::visit(OverloadBuilds{ + [&](DescriptorTable Table) -> MDNode * { + return BuildDescriptorTable(Table); + }, + [&](DescriptorTableClause Clause) -> MDNode * { + return BuildDescriptorTableClause(Clause); + }, + }, + Element); +GeneratedMetadata.push_back(ElementMD); + } + + return MDNode::get(Ctx, GeneratedMetadata); +} + +MDNode *MetadataBuilder::BuildDescriptorTable(const DescriptorTable &Table) { + IRBuilder<> B(Ctx); + SmallVector TableOperands; + // Set the mandatory arguments + TableOperands.push_back(MDString::get(Ctx, "DescriptorTable")); + TableOperands.push_back(ConstantAsMetadata::get( + B.getInt32(llvm::to_underlying(Table.Visibility; + + // Remaining operands are references to the table's clauses. The in-memory bob80905 wrote: Very helpful 👍 https://github.com/llvm/llvm-project/pull/125131 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)
https://github.com/bob80905 approved this pull request. Just a nit but this looks good to me https://github.com/llvm/llvm-project/pull/125131 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)
@@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -o - %s | FileCheck %s + +// CHECK-DAG: ![[#EMPTY:]] = !{} +[shader("compute"), RootSignature("")] +[numthreads(1,1,1)] +void FirstEntry() {} + +// CHECK-DAG: ![[#CBV:]] = !{!"CBV", i32 1, i32 0, i32 0, i32 -1, i32 4} inbelic wrote: These are in a deterministic order, I had only used DAG to help with readability of the testcase. But I can see that affects correctness. Updated to remove use of CHECK-DAG https://github.com/llvm/llvm-project/pull/125131 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)
https://github.com/inbelic edited https://github.com/llvm/llvm-project/pull/125131 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)
https://github.com/inbelic edited https://github.com/llvm/llvm-project/pull/125131 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)
https://github.com/inbelic updated https://github.com/llvm/llvm-project/pull/125131 >From abe7e6703a008608e19ce3f9bdcbd1b613fab60d Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Wed, 29 Jan 2025 19:40:08 + Subject: [PATCH 1/7] add basic empty root signature --- clang/lib/CodeGen/CGHLSLRuntime.cpp | 21 + clang/test/CodeGenHLSL/RootSignature.hlsl | 19 +++ 2 files changed, 40 insertions(+) create mode 100644 clang/test/CodeGenHLSL/RootSignature.hlsl diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index c354e58e15f4b..ff608323e9ac3 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -119,6 +119,20 @@ GlobalVariable *replaceBuffer(CGHLSLRuntime::Buffer &Buf) { return CBGV; } +void addRootSignature(llvm::Function *Fn, llvm::Module &M) { + auto &Ctx = M.getContext(); + IRBuilder<> B(M.getContext()); + + MDNode *ExampleRootSignature = MDNode::get(Ctx, {}); + + MDNode *ExamplePairing = MDNode::get(Ctx, {ValueAsMetadata::get(Fn), + ExampleRootSignature}); + + StringRef RootSignatureValKey = "dx.rootsignatures"; + auto *RootSignatureValMD = M.getOrInsertNamedMetadata(RootSignatureValKey); + RootSignatureValMD->addOperand(ExamplePairing); +} + } // namespace llvm::Type *CGHLSLRuntime::convertHLSLSpecificType(const Type *T) { @@ -453,6 +467,13 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD, // FIXME: Handle codegen for return type semantics. // See: https://github.com/llvm/llvm-project/issues/57875 B.CreateRetVoid(); + + // Add and identify root signature to function, if applicable + const AttrVec &Attrs = FD->getAttrs(); + for (const Attr *Attr : Attrs) { +if (isa(Attr)) + addRootSignature(EntryFn, M); + } } void CGHLSLRuntime::setHLSLFunctionAttributes(const FunctionDecl *FD, diff --git a/clang/test/CodeGenHLSL/RootSignature.hlsl b/clang/test/CodeGenHLSL/RootSignature.hlsl new file mode 100644 index 0..1ea9ab7aaa2c3 --- /dev/null +++ b/clang/test/CodeGenHLSL/RootSignature.hlsl @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -o - %s | FileCheck %s + +// CHECK: !dx.rootsignatures = !{![[#FIRST_ENTRY:]], ![[#SECOND_ENTRY:]]} +// CHECK-DAG: ![[#FIRST_ENTRY]] = !{ptr @FirstEntry, ![[#RS:]]} +// CHECK-DAG: ![[#SECOND_ENTRY]] = !{ptr @SecondEntry, ![[#RS:]]} +// CHECK-DAG: ![[#RS]] = !{} + +[shader("compute"), RootSignature("")] +[numthreads(1,1,1)] +void FirstEntry() {} + +[shader("compute"), RootSignature("DescriptorTable()")] +[numthreads(1,1,1)] +void SecondEntry() {} + +// Sanity test to ensure to root is added for this function +[shader("compute")] +[numthreads(1,1,1)] +void ThirdEntry() {} >From 671f099d3d58995677c47b4226481b72295e525d Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Wed, 29 Jan 2025 19:57:48 + Subject: [PATCH 2/7] pass down the actual root elements - test that we have the correct number of elements --- clang/lib/CodeGen/CGHLSLRuntime.cpp | 17 - clang/test/CodeGenHLSL/RootSignature.hlsl | 9 + 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index ff608323e9ac3..4c9adcd8a9053 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -119,11 +119,18 @@ GlobalVariable *replaceBuffer(CGHLSLRuntime::Buffer &Buf) { return CBGV; } -void addRootSignature(llvm::Function *Fn, llvm::Module &M) { +void addRootSignature( +ArrayRef Elements, +llvm::Function *Fn, llvm::Module &M) { auto &Ctx = M.getContext(); - IRBuilder<> B(M.getContext()); - MDNode *ExampleRootSignature = MDNode::get(Ctx, {}); + SmallVector GeneratedMetadata; + for (auto Element : Elements) { +MDNode *ExampleRootElement = MDNode::get(Ctx, {}); +GeneratedMetadata.push_back(ExampleRootElement); + } + + MDNode *ExampleRootSignature = MDNode::get(Ctx, GeneratedMetadata); MDNode *ExamplePairing = MDNode::get(Ctx, {ValueAsMetadata::get(Fn), ExampleRootSignature}); @@ -471,8 +478,8 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD, // Add and identify root signature to function, if applicable const AttrVec &Attrs = FD->getAttrs(); for (const Attr *Attr : Attrs) { -if (isa(Attr)) - addRootSignature(EntryFn, M); +if (const auto *RSAttr = dyn_cast(Attr)) + addRootSignature(RSAttr->getElements(), EntryFn, M); } } diff --git a/clang/test/CodeGenHLSL/RootSignature.hlsl b/clang/test/CodeGenHLSL/RootSignature.hlsl index 1ea9ab7aaa2c3..63c0505e224f0 100644 --- a/clang/test/CodeGenHLSL/RootSignature.hlsl +++ b/clang/test/CodeGenHLSL/RootSignature.hlsl @@ -1,9 +1,10 @@ // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -o - %s | FileCheck %s -// CHECK:
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)
@@ -0,0 +1,108 @@ +//===- HLSLRootSignature.cpp - HLSL Root Signature helper objects +//--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +/// +/// \file This file contains helpers for working with HLSL Root Signatures. +/// +//===--===// + +#include "llvm/Frontend/HLSL/HLSLRootSignature.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" + +namespace llvm { +namespace hlsl { +namespace rootsig { + +// Static helper functions + +static MDString *ClauseTypeToName(LLVMContext &Ctx, ClauseType Type) { + StringRef Name; + switch (Type) { + case ClauseType::CBuffer: +Name = "CBV"; +break; + case ClauseType::SRV: +Name = "SRV"; +break; + case ClauseType::UAV: +Name = "UAV"; +break; + case ClauseType::Sampler: +Name = "Sampler"; +break; + } + return MDString::get(Ctx, Name); +} + +// Helper struct so that we can use the overloaded notation of std::visit +template struct OverloadBuilds : Ts... { + using Ts::operator()...; +}; +template OverloadBuilds(Ts...) -> OverloadBuilds; + +MDNode *MetadataBuilder::BuildRootSignature() { + for (const RootElement &Element : Elements) { +MDNode *ElementMD = +std::visit(OverloadBuilds{ bob80905 wrote: Could you add a comment here for clarity? Something like "for each element, if it's a desctable, run this, if its a desctableclause, run this" Had to look up what std::visit does to understand this. https://github.com/llvm/llvm-project/pull/125131 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-num-agpr (PR #129893)
https://github.com/rampitec approved this pull request. https://github.com/llvm/llvm-project/pull/129893 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] f414ee4 - Revert "Revert "[clang] Lower modf builtin using `llvm.modf` intrinsic" (#127…"
Author: Benjamin Maxwell Date: 2025-03-05T13:48:07Z New Revision: f414ee456de732da99ae6a4c88304a6d82a7ff18 URL: https://github.com/llvm/llvm-project/commit/f414ee456de732da99ae6a4c88304a6d82a7ff18 DIFF: https://github.com/llvm/llvm-project/commit/f414ee456de732da99ae6a4c88304a6d82a7ff18.diff LOG: Revert "Revert "[clang] Lower modf builtin using `llvm.modf` intrinsic" (#127…" This reverts commit d595fc91aeb35cb7fad8ad37fa84a70863b09f69. Added: Modified: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/X86/math-builtins.c clang/test/CodeGen/aix-builtin-mapping.c clang/test/CodeGen/builtin-attributes.c clang/test/CodeGen/math-builtins-long.c clang/test/CodeGen/math-libcalls.c Removed: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ab8f19b25fa66..bd559a96d3182 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -859,6 +859,24 @@ static void emitSincosBuiltin(CodeGenFunction &CGF, const CallExpr *E, StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList); } +static llvm::Value *emitModfBuiltin(CodeGenFunction &CGF, const CallExpr *E, +llvm::Intrinsic::ID IntrinsicID) { + llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *IntPartDest = CGF.EmitScalarExpr(E->getArg(1)); + + llvm::Value *Call = + CGF.Builder.CreateIntrinsic(IntrinsicID, {Val->getType()}, Val); + + llvm::Value *FractionalResult = CGF.Builder.CreateExtractValue(Call, 0); + llvm::Value *IntegralResult = CGF.Builder.CreateExtractValue(Call, 1); + + QualType DestPtrType = E->getArg(1)->getType()->getPointeeType(); + LValue IntegralLV = CGF.MakeNaturalAlignAddrLValue(IntPartDest, DestPtrType); + CGF.EmitStoreOfScalar(IntegralResult, IntegralLV); + + return FractionalResult; +} + /// EmitFAbs - Emit a call to @llvm.fabs(). static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); @@ -4120,6 +4138,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_frexpf128: case Builtin::BI__builtin_frexpf16: return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp)); + case Builtin::BImodf: + case Builtin::BImodff: + case Builtin::BImodfl: + case Builtin::BI__builtin_modf: + case Builtin::BI__builtin_modff: + case Builtin::BI__builtin_modfl: +if (Builder.getIsFPConstrained()) + break; // TODO: Emit constrained modf intrinsic once one exists. +return RValue::get(emitModfBuiltin(*this, E, Intrinsic::modf)); case Builtin::BI__builtin_isgreater: case Builtin::BI__builtin_isgreaterequal: case Builtin::BI__builtin_isless: diff --git a/clang/test/CodeGen/X86/math-builtins.c b/clang/test/CodeGen/X86/math-builtins.c index 481d3c043683e..8a85d1f6c3a76 100644 --- a/clang/test/CodeGen/X86/math-builtins.c +++ b/clang/test/CodeGen/X86/math-builtins.c @@ -38,6 +38,24 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) { // NO__ERRNO-NEXT: [[FREXP_F128_0:%.+]] = extractvalue { fp128, i32 } [[FREXP_F128]], 0 +// NO__ERRNO: [[MODF_F64:%.+]] = call { double, double } @llvm.modf.f64(double %{{.+}}) +// NO__ERRNO-NEXT: [[MODF_F64_FP:%.+]] = extractvalue { double, double } [[MODF_F64]], 0 +// NO__ERRNO-NEXT: [[MODF_F64_IP:%.+]] = extractvalue { double, double } [[MODF_F64]], 1 +// NO__ERRNO-NEXT: store double [[MODF_F64_IP]], ptr %{{.+}}, align 8 + +// NO__ERRNO: [[MODF_F32:%.+]] = call { float, float } @llvm.modf.f32(float %{{.+}}) +// NO__ERRNO-NEXT: [[MODF_F32_FP:%.+]] = extractvalue { float, float } [[MODF_F32]], 0 +// NO__ERRNO-NEXT: [[MODF_F32_IP:%.+]] = extractvalue { float, float } [[MODF_F32]], 1 +// NO__ERRNO-NEXT: store float [[MODF_F32_IP]], ptr %{{.+}}, align 4 + +// NO__ERRNO: [[MODF_F80:%.+]] = call { x86_fp80, x86_fp80 } @llvm.modf.f80(x86_fp80 %{{.+}}) +// NO__ERRNO-NEXT: [[MODF_F80_FP:%.+]] = extractvalue { x86_fp80, x86_fp80 } [[MODF_F80]], 0 +// NO__ERRNO-NEXT: [[MODF_F80_IP:%.+]] = extractvalue { x86_fp80, x86_fp80 } [[MODF_F80]], 1 +// NO__ERRNO-NEXT: store x86_fp80 [[MODF_F80_IP]], ptr %{{.+}}, align 16 + +// NO__ERRNO: call fp128 @modff128(fp128 noundef %{{.+}}, ptr noundef %{{.+}}) + + // NO__ERRNO: [[SINCOS_F64:%.+]] = call { double, double } @llvm.sincos.f64(double %{{.+}}) // NO__ERRNO-NEXT: [[SINCOS_F64_0:%.+]] = extractvalue { double, double } [[SINCOS_F64]], 0 // NO__ERRNO-NEXT: [[SINCOS_F64_1:%.+]] = extractvalue { double, double } [[SINCOS_F64]], 1 @@ -158,13 +176,13 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) { __builtin_modf(f,d); __builtin_modff(f,fp); __builtin_modfl(f,l); __builtin_modff128(f,l); -// NO__ERRNO: declare double @modf(double noundef, ptr noundef) [[NOT_READNONE:#[0-9]+]] -
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port MachineBlockPlacementStats to NPM (PR #129853)
optimisan wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/129853?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#129853** https://app.graphite.dev/github/pr/llvm/llvm-project/129853?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/129853?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#129828** https://app.graphite.dev/github/pr/llvm/llvm-project/129828?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/129853 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Start considering new atomicrmw metadata on integer operations (PR #122138)
arsenm wrote: psdb fails on some atomic tests with these, so they need some debugging https://github.com/llvm/llvm-project/pull/122138 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-num-agpr (PR #129893)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes This performs the minimal replacment of amdgpu-no-agpr to amdgpu-num-agpr=0. Most of the test diffs are due to the new attribute sorting later alphabetically. We could do better by trying to perform range merging in the attributor, and trying to pick non-0 values. --- Patch is 168.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129893.diff 45 Files Affected: - (modified) llvm/docs/AMDGPUUsage.rst (+1-6) - (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+7-2) - (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (+4-1) - (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp (+1-7) - (modified) llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu-no-agprs-violations.ll (+7-6) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu-num-agpr.ll (+6-6) - (modified) llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (+21-21) - (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+13-13) - (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll (+9-9) - (modified) llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll (+6-6) - (modified) llvm/test/CodeGen/AMDGPU/captured-frame-index.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll (+13-13) - (modified) llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/invalid-hidden-kernarg-in-kernel-signature.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/mfma-bf16-vgpr-cd-select.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select-gfx942.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/preload-kernargs.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll (+9-9) - (modified) llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll (+21-21) - (modified) llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll (+6-6) ``diff diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index c317223f49d7c..def6addd595e8 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1698,11 +1698,6 @@ The AMDGPU backend supports the following LLVM IR attributes. ``amdgpu_max_num_work_groups`` CLANG attribute [CLANG-ATTR]_. Clang only emits this attribute when all the three numbers are >= 1. - "amdgpu-no-agpr" Indicates the function will not require allocating AGPRs. This is only - relevant on subtargets with AGPRs. The behavior is undefined if a - function which requires AGPRs is reached through any function marked - with this attribute. - "amdgpu-hidden-argument"
[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-num-agpr (PR #129893)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/129893 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Replace amdgpu-no-agpr with amdgpu-num-agpr (PR #129893)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/129893?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#129893** https://app.graphite.dev/github/pr/llvm/llvm-project/129893?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/129893?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#128034** https://app.graphite.dev/github/pr/llvm/llvm-project/128034?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/129893 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [Clang] Treat constexpr-unknown value as invalid in `EvaluateAsInitializer` (#128409) (PR #129836)
llvmbot wrote: @shafik What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/129836 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)
delcypher wrote: @fmayer The usual approach for indicating instrumentation in Clang is to use opt-remarks. This is the approach we use for `-fbounds-safety`. In `-fbounds-safety` we embed "trap reasons" in debug info so that debuggers and symbolication tools can better understand the reason for trapping. What's the reason for using debug info, instead of opt-remarks here? https://github.com/llvm/llvm-project/pull/128977 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)
fmayer wrote: > @fmayer The usual approach for indicating instrumentation in Clang is to use > opt-remarks. This is the approach we use for `-fbounds-safety`. > > In `-fbounds-safety` we embed "trap reasons" in debug info so that debuggers > and symbolication tools can better understand the reason for trapping. > > What's the reason for using debug info, instead of opt-remarks here? The commit description is maybe not very clear. This is not for _compiile time_ as opt remarks, but for run time. By doing this, we can 1) (more importantly) use profilers to estimate how many cycles we spend on these checks (subject to caveats), 2) more easily see why we crashed. https://github.com/llvm/llvm-project/pull/128977 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Updating DXContainer documentation to add Root Descriptors (PR #129759)
https://github.com/joaosaffran updated https://github.com/llvm/llvm-project/pull/129759 >From b390cd27d2b32f0e3b3d13c8ef3020cbd6af1fa9 Mon Sep 17 00:00:00 2001 From: joaosaffran <126493771+joaosaff...@users.noreply.github.com> Date: Tue, 4 Mar 2025 10:30:07 -0800 Subject: [PATCH 1/5] Adding root descriptor subsection --- llvm/docs/DirectX/DXContainer.rst | 50 ++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/llvm/docs/DirectX/DXContainer.rst b/llvm/docs/DirectX/DXContainer.rst index 0e7026b03a606..14bc802ff6b97 100644 --- a/llvm/docs/DirectX/DXContainer.rst +++ b/llvm/docs/DirectX/DXContainer.rst @@ -496,4 +496,52 @@ signature and passed to the shader without requiring a constant buffer resource: #. **RegisterSpace**: The register space used for the binding. #. **Num32BitValues**: The number of 32-bit values included in this constant buffer. -Root constants provide a fast way to pass small amounts of data directly to the shader without the overhead of creating and binding a constant buffer resource. +Root constants provide a fast way to pass small amounts of data directly to the shader without the overhead +of creating and binding a constant buffer resource. + +Root Descriptor +~~ + +Root descriptors provide a direct mechanism for binding individual resources to shader stages in the Direct3D 12 +rendering pipeline. They represent a critical interface for efficient resource management, allowing applications +to specify how shader stages access specific GPU resources. + +.. code-block:: cpp + + // Version 1.0 Root Descriptor + struct RootDescriptor_V1_0 { + uint32_t ShaderRegister; + uint32_t RegisterSpace; + }; + + // Version 1.1 Root Descriptor + struct RootDescriptor_V1_1 { + uint32_t ShaderRegister; + uint32_t RegisterSpace; + // New flags for Version 1.1 + enum Flags { +None= 0x0, +DATA_STATIC = 0x1, +DATA_STATIC_WHILE_SET_AT_EXECUTE = 0x2, +DATA_VOLATILE = 0x4 + }; + + // Bitfield of flags from the Flags enum + uint32_t Flags; + }; + +The Root Descriptor structure has evolved to support two versions, providing enhanced flexibility and +performance optimization capabilities. + +Version 1.0 Root Descriptor +''' +The Version 1.0 RootDescriptor_V1_0 provides basic resource binding: + +#. **ShaderRegister**: The shader register where the descriptor is bound. +#. **RegisterSpace**: The register space used for the binding. + +Version 1.1 Root Descriptor +''' +The Version 1.1 RootDescriptor_V1_1 extends the base structure with the following additional fields: + +#. **Flags**: Provides additional metadata about the descriptor's usage pattern. >From 46face18e140b1313dfdc437e6f4ee03904d245a Mon Sep 17 00:00:00 2001 From: joaosaffran <126493771+joaosaff...@users.noreply.github.com> Date: Tue, 4 Mar 2025 10:32:54 -0800 Subject: [PATCH 2/5] Fix git error --- llvm/docs/DirectX/DXContainer.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/docs/DirectX/DXContainer.rst b/llvm/docs/DirectX/DXContainer.rst index 14bc802ff6b97..93ed9afe42b50 100644 --- a/llvm/docs/DirectX/DXContainer.rst +++ b/llvm/docs/DirectX/DXContainer.rst @@ -496,8 +496,7 @@ signature and passed to the shader without requiring a constant buffer resource: #. **RegisterSpace**: The register space used for the binding. #. **Num32BitValues**: The number of 32-bit values included in this constant buffer. -Root constants provide a fast way to pass small amounts of data directly to the shader without the overhead -of creating and binding a constant buffer resource. +Root constants provide a fast way to pass small amounts of data directly to the shader without the overhead of creating and binding a constant buffer resource. Root Descriptor ~~ >From 6a260b3fe40c05b4f159a5d26345229ae221f593 Mon Sep 17 00:00:00 2001 From: joaosaffran <126493771+joaosaff...@users.noreply.github.com> Date: Tue, 4 Mar 2025 10:53:11 -0800 Subject: [PATCH 3/5] Try fix test --- llvm/docs/DirectX/DXContainer.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/DirectX/DXContainer.rst b/llvm/docs/DirectX/DXContainer.rst index 93ed9afe42b50..b9a2067368e0f 100644 --- a/llvm/docs/DirectX/DXContainer.rst +++ b/llvm/docs/DirectX/DXContainer.rst @@ -499,7 +499,7 @@ signature and passed to the shader without requiring a constant buffer resource: Root constants provide a fast way to pass small amounts of data directly to the shader without the overhead of creating and binding a constant buffer resource. Root Descriptor -~~ +~~~ Root descriptors provide a direct mechanism for binding individual resources to shader stages in the Direct3D 12 rendering pipeline. They represent a critical interface for efficient resource
[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)
https://github.com/fmayer edited https://github.com/llvm/llvm-project/pull/128977 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)
https://github.com/delcypher edited https://github.com/llvm/llvm-project/pull/128977 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)
https://github.com/delcypher approved this pull request. Thanks for explaining the purpose. Regarding the "more easily see why we crashed." please be aware [I have a GSoC proposal to basically do this](https://discourse.llvm.org/t/clang-gsoc-2025-usability-improvements-for-trapping-undefined-behavior-sanitizer/84568) using the `createTrapFailureMessageFor`. So if possible please don't tackle what I describe in the proposal before a GSoC student has had a chance to do this. To be clear what you've done in this PR is different from I'm proposing so they don't conflict. Also * Please give a chance for Clang Debug Info contributors to look over this (CC @adrian-prantl) before landing this. * When possible add as reviewers previous people who worked on the code. In this particular case I believe this was @ahatanak https://github.com/llvm/llvm-project/pull/128977 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)
@@ -3598,6 +3598,14 @@ llvm::DIMacroFile *CGDebugInfo::CreateTempMacroFile(llvm::DIMacroFile *Parent, return DBuilder.createTempMacroFile(Parent, Line, FName); } +llvm::DILocation *CGDebugInfo::CreateSyntheticInline(llvm::DebugLoc Location, + StringRef FuncName) { + llvm::DISubprogram *TrapSP = delcypher wrote: Nit. The name `TrapSP` doesn't make much sense here given this function isn't specifically for traps. https://github.com/llvm/llvm-project/pull/128977 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)
@@ -635,6 +635,13 @@ class CGDebugInfo { llvm::DILocation *CreateTrapFailureMessageFor(llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg); + /// Create a debug location from `Location` that adds an artificial inline + /// frame where the frame name is FuncName + /// + /// This is used to indiciate instructions that come from compiler + /// instrumentation. + llvm::DILocation *CreateSyntheticInline(llvm::DebugLoc Location, delcypher wrote: Nit. Maybe call it `CreateSyntheticInlineAt` ? Those who know more about Clang debug info generation (e.g. @adrian-prantl @felipepiovezan @Michael137 ) might have ideas on a better name though. https://github.com/llvm/llvm-project/pull/128977 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)
https://github.com/delcypher edited https://github.com/llvm/llvm-project/pull/128977 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)
@@ -2791,8 +2791,26 @@ void AsmPrinter::emitConstantPool() { if (!CPE.isMachineConstantPoolEntry()) C = CPE.Val.ConstVal; -MCSection *S = getObjFileLowering().getSectionForConstant( -getDataLayout(), Kind, C, Alignment); +MCSection *S = nullptr; mingmingl-llvm wrote: done. https://github.com/llvm/llvm-project/pull/129781 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)
@@ -1072,6 +1072,41 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant( return DataRelROSection; } +MCSection *TargetLoweringObjectFileELF::getSectionForConstant( +const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment, +StringRef SectionPrefix) const { mingmingl-llvm wrote: done. https://github.com/llvm/llvm-project/pull/129781 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)
@@ -203,17 +218,34 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) { void StaticDataSplitter::annotateStaticDataWithoutProfiles( const MachineFunction &MF) { + const MachineConstantPool *MCP = MF.getConstantPool(); for (const auto &MBB : MF) { for (const MachineInstr &I : MBB) { for (const MachineOperand &Op : I.operands()) { -if (!Op.isGlobal()) - continue; -const GlobalVariable *GV = -getLocalLinkageGlobalVariable(Op.getGlobal()); -if (!GV || GV->getName().starts_with("llvm.") || -!inStaticDataSection(GV, MF.getTarget())) +if (!Op.isGlobal() && !Op.isCPI()) continue; -SDPI->addConstantProfileCount(GV, std::nullopt); +if (Op.isGlobal()) { + const GlobalVariable *GV = + getLocalLinkageGlobalVariable(Op.getGlobal()); + if (!GV || GV->getName().starts_with("llvm.") || + !inStaticDataSection(GV, MF.getTarget())) +continue; + SDPI->addConstantProfileCount(GV, std::nullopt); +} else { + assert(Op.isCPI() && "Op must be constant pool index in this branch"); mingmingl-llvm wrote: Added `getConstant` helper function to share code between profile and non-profile path. https://github.com/llvm/llvm-project/pull/129781 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)
@@ -386,6 +386,16 @@ MCSection *TargetLoweringObjectFile::getSectionForConstant( return DataSection; } +MCSection *TargetLoweringObjectFile::getSectionForConstant( +const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment, +StringRef SectionPrefix) const { + // Fallback to `getSectionForConstant` without `SectionPrefix` parameter if it + // is empty. + if (SectionPrefix.empty()) mingmingl-llvm wrote: Do you mean something like `assert(!SectionPrefix.empty() && "Call another method if section prefix is empty` here? I think with the refactor suggested above (https://github.com/llvm/llvm-project/pull/129781/files#r1980454779), we can allow the new interface to handle empty section prefix by falling back to the original interface. What do you think about it? https://github.com/llvm/llvm-project/pull/129781 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)
https://github.com/mingmingl-llvm updated https://github.com/llvm/llvm-project/pull/129781 >From 072c44f0f9272682480cc2837196a906bd694276 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Fri, 28 Feb 2025 14:41:56 -0800 Subject: [PATCH 1/2] [CodeGen][StaticDataSplitter]Support constant pool partitioning --- llvm/include/llvm/CodeGen/AsmPrinter.h| 8 + .../CodeGen/TargetLoweringObjectFileImpl.h| 6 + .../llvm/Target/TargetLoweringObjectFile.h| 7 + llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp| 22 ++- llvm/lib/CodeGen/StaticDataSplitter.cpp | 56 +-- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 35 + llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 10 ++ llvm/lib/Target/TargetLoweringObjectFile.cpp | 10 ++ llvm/lib/Target/X86/X86AsmPrinter.cpp | 10 ++ .../AArch64/constant-pool-partition.ll| 141 ++ .../CodeGen/X86/constant-pool-partition.ll| 131 11 files changed, 422 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/constant-pool-partition.ll create mode 100644 llvm/test/CodeGen/X86/constant-pool-partition.ll diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 3da63af5ba571..2018f411be796 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -18,6 +18,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -132,6 +134,12 @@ class AsmPrinter : public MachineFunctionPass { /// default, this is equal to CurrentFnSym. MCSymbol *CurrentFnSymForSize = nullptr; + /// Provides the profile information for constants. + const StaticDataProfileInfo *SDPI = nullptr; + + /// The profile summary information. + const ProfileSummaryInfo *PSI = nullptr; + /// Map a basic block section ID to the begin and end symbols of that section /// which determine the section's range. struct MBBSectionRange { diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index 10f0594c267ae..563980fb24ab8 100644 --- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -68,6 +68,12 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { const Constant *C, Align &Alignment) const override; + /// Similar to the function above, but append \p SectionSuffix to the section + /// name. + MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, + const Constant *C, Align &Alignment, + StringRef SectionSuffix) const override; + MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const override; diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h index a5ed1b29dc1bc..1956748b8058b 100644 --- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h +++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h @@ -104,6 +104,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { SectionKind Kind, const Constant *C, Align &Alignment) const; + /// Similar to the function above, but append \p SectionSuffix to the section + /// name. + virtual MCSection *getSectionForConstant(const DataLayout &DL, + SectionKind Kind, const Constant *C, + Align &Alignment, + StringRef SectionSuffix) const; + virtual MCSection * getSectionForMachineBasicBlock(const Function &F, const MachineBasicBlock &MBB, diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 3c4280333e76d..60018afe2f8a7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2791,8 +2791,26 @@ void AsmPrinter::emitConstantPool() { if (!CPE.isMachineConstantPoolEntry()) C = CPE.Val.ConstVal; -MCSection *S = getObjFileLowering().getSectionForConstant( -getDataLayout(), Kind, C, Alignment); +MCSection *S = nullptr; +if (TM.Options.EnableStaticDataPartitioning) { + SmallString<8> SectionNameSuffix; + if (C && SDPI && PSI) { +auto Count = SDPI->getConstantProfileCount(C); +if (Count) { +
[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)
jmciver wrote: @nikic and @dtcxzyw thanks for the feedback. This patch does bypasses the need for freeze poison -> null canonicalization removal in InstCombine. To provide context I am seeing the lack of store of select freeze poison folding in uninitialized memory semantics work that I am doing with @nlopes. I would like to incorporate this and then once freeze poison -> null canonicalization is removed from InstCombine refactor appropriately. Would this be acceptable? https://github.com/llvm/llvm-project/pull/129776 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits