[llvm-branch-commits] [lldb] release/20.x: [lldb] Use correct path for lldb-server executable (#131519) (PR #134072)
https://github.com/labath approved this pull request. https://github.com/llvm/llvm-project/pull/134072 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/20.x: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) (PR #137620)
https://github.com/anutosh491 created https://github.com/llvm/llvm-project/pull/137620 Backport : https://github.com/llvm/llvm-project/commit/9cbbb74d370c09e13b8412f21dccb7d2c4afc6a4 >From 8efbc116707fd482ddb2d3d890bbd93c0b01427b Mon Sep 17 00:00:00 2001 From: Anutosh Bhat Date: Fri, 25 Apr 2025 20:05:00 +0530 Subject: [PATCH] [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Towards This change moves WasmSym from a static global struct to an instance owned by Ctx, allowing it to be reset cleanly between linker runs. This enables safe support for multiple invocations of wasm-ld within the same process Changes done - Converted WasmSym from a static struct to a regular struct with instance members. - Added a std::unique_ptr wasmSym field inside Ctx. - Reset wasmSym in Ctx::reset() to clear state between links. - Replaced all WasmSym:: references with ctx.wasmSym->. - Removed global symbol definitions from Symbols.cpp that are no longer needed. Clearing wasmSym in ctx.reset() ensures a clean slate for each link invocation, preventing symbol leakage across runs—critical when using wasm-ld/lld as a reentrant library where global state can cause subtle, hard-to-debug errors. - Co-authored-by: Vassil Vassilev (cherry picked from commit 9cbbb74d370c09e13b8412f21dccb7d2c4afc6a4) --- lld/wasm/Config.h | 110 +++ lld/wasm/Driver.cpp| 64 +-- lld/wasm/InputChunks.cpp | 10 +- lld/wasm/MarkLive.cpp | 6 +- lld/wasm/OutputSections.cpp| 4 +- lld/wasm/Symbols.cpp | 25 - lld/wasm/Symbols.h | 99 - lld/wasm/SyntheticSections.cpp | 32 +++--- lld/wasm/Writer.cpp| 187 + 9 files changed, 262 insertions(+), 275 deletions(-) diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index 1fa6c42d9cd86..71dabaedb8a8c 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -32,6 +32,11 @@ class InputTable; class InputGlobal; class InputFunction; class Symbol; +class DefinedData; +class GlobalSymbol; +class DefinedFunction; +class UndefinedGlobal; +class TableSymbol; // For --unresolved-symbols. enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic }; @@ -139,6 +144,111 @@ struct Ctx { llvm::SmallVector syntheticGlobals; llvm::SmallVector syntheticTables; + // linker-generated symbols + struct WasmSym { +// __global_base +// Symbol marking the start of the global section. +DefinedData *globalBase; + +// __stack_pointer/__stack_low/__stack_high +// Global that holds current value of stack pointer and data symbols marking +// the start and end of the stack region. stackPointer is initialized to +// stackHigh and grows downwards towards stackLow +GlobalSymbol *stackPointer; +DefinedData *stackLow; +DefinedData *stackHigh; + +// __tls_base +// Global that holds the address of the base of the current thread's +// TLS block. +GlobalSymbol *tlsBase; + +// __tls_size +// Symbol whose value is the size of the TLS block. +GlobalSymbol *tlsSize; + +// __tls_size +// Symbol whose value is the alignment of the TLS block. +GlobalSymbol *tlsAlign; + +// __data_end +// Symbol marking the end of the data and bss. +DefinedData *dataEnd; + +// __heap_base/__heap_end +// Symbols marking the beginning and end of the "heap". It starts at the end +// of the data, bss and explicit stack, and extends to the end of the linear +// memory allocated by wasm-ld. This region of memory is not used by the +// linked code, so it may be used as a backing store for `sbrk` or `malloc` +// implementations. +DefinedData *heapBase; +DefinedData *heapEnd; + +// __wasm_first_page_end +// A symbol whose address is the end of the first page in memory (if any). +DefinedData *firstPageEnd; + +// __wasm_init_memory_flag +// Symbol whose contents are nonzero iff memory has already been +// initialized. +DefinedData *initMemoryFlag; + +// __wasm_init_memory +// Function that initializes passive data segments during instantiation. +DefinedFunction *initMemory; + +// __wasm_call_ctors +// Function that directly calls all ctors in priority order. +DefinedFunction *callCtors; + +// __wasm_call_dtors +// Function that calls the libc/etc. cleanup function. +DefinedFunction *callDtors; + +// __wasm_apply_global_relocs +// Function that applies relocations to wasm globals post-instantiation. +// Unlike __wasm_apply_data_relocs this needs to run on every thread. +DefinedFunction *applyGlobalRelocs; + +// __wasm_apply_tls_relocs +// Like __wasm_apply_data_relocs but for TLS section. These must be +// delayed until __wasm_init_tls. +De
[llvm-branch-commits] [lld] release/20.x: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) (PR #137620)
llvmbot wrote: @llvm/pr-subscribers-lld-wasm Author: Anutosh Bhat (anutosh491) Changes Backport : https://github.com/llvm/llvm-project/commit/9cbbb74d370c09e13b8412f21dccb7d2c4afc6a4 --- Patch is 44.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137620.diff 9 Files Affected: - (modified) lld/wasm/Config.h (+110) - (modified) lld/wasm/Driver.cpp (+34-30) - (modified) lld/wasm/InputChunks.cpp (+5-5) - (modified) lld/wasm/MarkLive.cpp (+3-3) - (modified) lld/wasm/OutputSections.cpp (+2-2) - (modified) lld/wasm/Symbols.cpp (-25) - (modified) lld/wasm/Symbols.h (-99) - (modified) lld/wasm/SyntheticSections.cpp (+14-18) - (modified) lld/wasm/Writer.cpp (+94-93) ``diff diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index 1fa6c42d9cd86..71dabaedb8a8c 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -32,6 +32,11 @@ class InputTable; class InputGlobal; class InputFunction; class Symbol; +class DefinedData; +class GlobalSymbol; +class DefinedFunction; +class UndefinedGlobal; +class TableSymbol; // For --unresolved-symbols. enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic }; @@ -139,6 +144,111 @@ struct Ctx { llvm::SmallVector syntheticGlobals; llvm::SmallVector syntheticTables; + // linker-generated symbols + struct WasmSym { +// __global_base +// Symbol marking the start of the global section. +DefinedData *globalBase; + +// __stack_pointer/__stack_low/__stack_high +// Global that holds current value of stack pointer and data symbols marking +// the start and end of the stack region. stackPointer is initialized to +// stackHigh and grows downwards towards stackLow +GlobalSymbol *stackPointer; +DefinedData *stackLow; +DefinedData *stackHigh; + +// __tls_base +// Global that holds the address of the base of the current thread's +// TLS block. +GlobalSymbol *tlsBase; + +// __tls_size +// Symbol whose value is the size of the TLS block. +GlobalSymbol *tlsSize; + +// __tls_size +// Symbol whose value is the alignment of the TLS block. +GlobalSymbol *tlsAlign; + +// __data_end +// Symbol marking the end of the data and bss. +DefinedData *dataEnd; + +// __heap_base/__heap_end +// Symbols marking the beginning and end of the "heap". It starts at the end +// of the data, bss and explicit stack, and extends to the end of the linear +// memory allocated by wasm-ld. This region of memory is not used by the +// linked code, so it may be used as a backing store for `sbrk` or `malloc` +// implementations. +DefinedData *heapBase; +DefinedData *heapEnd; + +// __wasm_first_page_end +// A symbol whose address is the end of the first page in memory (if any). +DefinedData *firstPageEnd; + +// __wasm_init_memory_flag +// Symbol whose contents are nonzero iff memory has already been +// initialized. +DefinedData *initMemoryFlag; + +// __wasm_init_memory +// Function that initializes passive data segments during instantiation. +DefinedFunction *initMemory; + +// __wasm_call_ctors +// Function that directly calls all ctors in priority order. +DefinedFunction *callCtors; + +// __wasm_call_dtors +// Function that calls the libc/etc. cleanup function. +DefinedFunction *callDtors; + +// __wasm_apply_global_relocs +// Function that applies relocations to wasm globals post-instantiation. +// Unlike __wasm_apply_data_relocs this needs to run on every thread. +DefinedFunction *applyGlobalRelocs; + +// __wasm_apply_tls_relocs +// Like __wasm_apply_data_relocs but for TLS section. These must be +// delayed until __wasm_init_tls. +DefinedFunction *applyTLSRelocs; + +// __wasm_apply_global_tls_relocs +// Like applyGlobalRelocs but for globals that hold TLS addresses. These +// must be delayed until __wasm_init_tls. +DefinedFunction *applyGlobalTLSRelocs; + +// __wasm_init_tls +// Function that allocates thread-local storage and initializes it. +DefinedFunction *initTLS; + +// Pointer to the function that is to be used in the start section. +// (normally an alias of initMemory, or applyGlobalRelocs). +DefinedFunction *startFunction; + +// __dso_handle +// Symbol used in calls to __cxa_atexit to determine current DLL +DefinedData *dsoHandle; + +// __table_base +// Used in PIC code for offset of indirect function table +UndefinedGlobal *tableBase; +DefinedData *definedTableBase; + +// __memory_base +// Used in PIC code for offset of global data +UndefinedGlobal *memoryBase; +DefinedData *definedMemoryBase; + +// __indirect_function_table +// Used as an address space for function pointers, with each function that +// is used as a function pointer being allocated a slot. +TableSymbol *indirectFunctionTable
[llvm-branch-commits] [lld] release/20.x: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) (PR #137620)
llvmbot wrote: @llvm/pr-subscribers-lld Author: Anutosh Bhat (anutosh491) Changes Backport : https://github.com/llvm/llvm-project/commit/9cbbb74d370c09e13b8412f21dccb7d2c4afc6a4 --- Patch is 44.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137620.diff 9 Files Affected: - (modified) lld/wasm/Config.h (+110) - (modified) lld/wasm/Driver.cpp (+34-30) - (modified) lld/wasm/InputChunks.cpp (+5-5) - (modified) lld/wasm/MarkLive.cpp (+3-3) - (modified) lld/wasm/OutputSections.cpp (+2-2) - (modified) lld/wasm/Symbols.cpp (-25) - (modified) lld/wasm/Symbols.h (-99) - (modified) lld/wasm/SyntheticSections.cpp (+14-18) - (modified) lld/wasm/Writer.cpp (+94-93) ``diff diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index 1fa6c42d9cd86..71dabaedb8a8c 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -32,6 +32,11 @@ class InputTable; class InputGlobal; class InputFunction; class Symbol; +class DefinedData; +class GlobalSymbol; +class DefinedFunction; +class UndefinedGlobal; +class TableSymbol; // For --unresolved-symbols. enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic }; @@ -139,6 +144,111 @@ struct Ctx { llvm::SmallVector syntheticGlobals; llvm::SmallVector syntheticTables; + // linker-generated symbols + struct WasmSym { +// __global_base +// Symbol marking the start of the global section. +DefinedData *globalBase; + +// __stack_pointer/__stack_low/__stack_high +// Global that holds current value of stack pointer and data symbols marking +// the start and end of the stack region. stackPointer is initialized to +// stackHigh and grows downwards towards stackLow +GlobalSymbol *stackPointer; +DefinedData *stackLow; +DefinedData *stackHigh; + +// __tls_base +// Global that holds the address of the base of the current thread's +// TLS block. +GlobalSymbol *tlsBase; + +// __tls_size +// Symbol whose value is the size of the TLS block. +GlobalSymbol *tlsSize; + +// __tls_size +// Symbol whose value is the alignment of the TLS block. +GlobalSymbol *tlsAlign; + +// __data_end +// Symbol marking the end of the data and bss. +DefinedData *dataEnd; + +// __heap_base/__heap_end +// Symbols marking the beginning and end of the "heap". It starts at the end +// of the data, bss and explicit stack, and extends to the end of the linear +// memory allocated by wasm-ld. This region of memory is not used by the +// linked code, so it may be used as a backing store for `sbrk` or `malloc` +// implementations. +DefinedData *heapBase; +DefinedData *heapEnd; + +// __wasm_first_page_end +// A symbol whose address is the end of the first page in memory (if any). +DefinedData *firstPageEnd; + +// __wasm_init_memory_flag +// Symbol whose contents are nonzero iff memory has already been +// initialized. +DefinedData *initMemoryFlag; + +// __wasm_init_memory +// Function that initializes passive data segments during instantiation. +DefinedFunction *initMemory; + +// __wasm_call_ctors +// Function that directly calls all ctors in priority order. +DefinedFunction *callCtors; + +// __wasm_call_dtors +// Function that calls the libc/etc. cleanup function. +DefinedFunction *callDtors; + +// __wasm_apply_global_relocs +// Function that applies relocations to wasm globals post-instantiation. +// Unlike __wasm_apply_data_relocs this needs to run on every thread. +DefinedFunction *applyGlobalRelocs; + +// __wasm_apply_tls_relocs +// Like __wasm_apply_data_relocs but for TLS section. These must be +// delayed until __wasm_init_tls. +DefinedFunction *applyTLSRelocs; + +// __wasm_apply_global_tls_relocs +// Like applyGlobalRelocs but for globals that hold TLS addresses. These +// must be delayed until __wasm_init_tls. +DefinedFunction *applyGlobalTLSRelocs; + +// __wasm_init_tls +// Function that allocates thread-local storage and initializes it. +DefinedFunction *initTLS; + +// Pointer to the function that is to be used in the start section. +// (normally an alias of initMemory, or applyGlobalRelocs). +DefinedFunction *startFunction; + +// __dso_handle +// Symbol used in calls to __cxa_atexit to determine current DLL +DefinedData *dsoHandle; + +// __table_base +// Used in PIC code for offset of indirect function table +UndefinedGlobal *tableBase; +DefinedData *definedTableBase; + +// __memory_base +// Used in PIC code for offset of global data +UndefinedGlobal *memoryBase; +DefinedData *definedMemoryBase; + +// __indirect_function_table +// Used as an address space for function pointers, with each function that +// is used as a function pointer being allocated a slot. +TableSymbol *indirectFunctionTable; +
[llvm-branch-commits] [llvm] [AMDGPU] Remove the pass `AMDGPUPromoteKernelArguments` (PR #137655)
@@ -11,11 +10,9 @@ define amdgpu_kernel void @ptr_nest_3(ptr addrspace(1) nocapture readonly %Arg) ; CHECK-NEXT: entry: ; CHECK-NEXT:[[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT:[[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i32 [[I]] -; CHECK-NEXT:[[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber [[META0:![0-9]+]] -; CHECK-NEXT:[[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) -; CHECK-NEXT:[[P3:%.*]] = load ptr, ptr addrspace(1) [[P2_GLOBAL]], align 8, !amdgpu.noclobber [[META0]] -; CHECK-NEXT:[[P3_GLOBAL:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) -; CHECK-NEXT:store float 0.00e+00, ptr addrspace(1) [[P3_GLOBAL]], align 4 +; CHECK-NEXT:[[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8 +; CHECK-NEXT:[[P3:%.*]] = load ptr, ptr [[P2]], align 8 rampitec wrote: I think you can have an invalid pointer anywhere, but that is up to the program not to dereference an invalid pointer. On practice it cannot be anything but global as passed from host. Even if another kernel place there any other pointer it is illegal to use it, and it is up to the developer not to do it. It should not prevent the optimization. https://github.com/llvm/llvm-project/pull/137655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect signing oracles (PR #134146)
https://github.com/kbeyls approved this pull request. Thanks for your patience with my many questions! This looks good to merge to me now. https://github.com/llvm/llvm-project/pull/134146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect signing oracles (PR #134146)
https://github.com/kbeyls edited https://github.com/llvm/llvm-project/pull/134146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect signing oracles (PR #134146)
@@ -462,7 +563,22 @@ class DataflowSrcSafetyAnalysis return DFParent::getStateBefore(Inst); } - void run() override { DFParent::run(); } + void run() override { +for (BinaryBasicBlock &BB : Func) { + if (auto CheckerInfo = BC.MIB->getAuthCheckedReg(BB)) { +MCInst *LastInstOfChecker = BB.getLastNonPseudoInstr(); +LLVM_DEBUG({ + dbgs() << "Found pointer checking sequence in " << BB.getName() + << ":\n"; + traceReg(BC, "Checked register", CheckerInfo->first); + traceInst(BC, "First instruction", *CheckerInfo->second); + traceInst(BC, "Last instruction", *LastInstOfChecker); +}); +CheckerSequenceInfo[LastInstOfChecker] = *CheckerInfo; + } +} kbeyls wrote: Fair enough, let's leave it as it is. https://github.com/llvm/llvm-project/pull/134146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect signing oracles (PR #134146)
@@ -591,7 +591,9 @@ obscure_indirect_call_arg_nocfg: .globl safe_lr_at_function_entry_nocfg .type safe_lr_at_function_entry_nocfg,@function safe_lr_at_function_entry_nocfg: -// CHECK-NOT: safe_lr_at_function_entry_nocfg +// Due to state being reset after a label, paciasp is reported as +// a signing oracle - this is a known false positive, ignore it. +// CHECK-NOT: non-protected call{{.*}}safe_lr_at_function_entry_nocfg cbz x0, 1f ret// LR is safe at the start of the function 1: kbeyls wrote: Thanks, that's useful info to know! FWIW, my experience on pac-ret is that most code generated by the compiler follows mostly the same very regular structure, so I'm not surprised that you're not getting many false positives on llvm-test-suite. In my experience with pac-ret scanning, you get most false positives when scanning across a full distribution, on pieces of code that were not generated by a popular compiler, such as hand-written assembly... https://github.com/llvm/llvm-project/pull/134146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect signing oracles (PR #134146)
@@ -355,6 +389,46 @@ class SrcSafetyAnalysis { return Regs; } + // Returns all registers made trusted by this instruction. + SmallVector getRegsMadeTrusted(const MCInst &Point, +const SrcState &Cur) const { +SmallVector Regs; +const MCPhysReg NoReg = BC.MIB->getNoRegister(); + +// An authenticated pointer can be checked, or +MCPhysReg CheckedReg = +BC.MIB->getAuthCheckedReg(Point, /*MayOverwrite=*/false); +if (CheckedReg != NoReg && Cur.SafeToDerefRegs[CheckedReg]) + Regs.push_back(CheckedReg); + +if (CheckerSequenceInfo.contains(&Point)) { + MCPhysReg CheckedReg; + const MCInst *FirstCheckerInst; + std::tie(CheckedReg, FirstCheckerInst) = CheckerSequenceInfo.at(&Point); + + // FirstCheckerInst should belong to the same basic block, meaning + // it was deterministically processed a few steps before this instruction. + const SrcState &StateBeforeChecker = + getStateBefore(*FirstCheckerInst).get(); kbeyls wrote: Thanks, that all makes sense. https://github.com/llvm/llvm-project/pull/134146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Remove the pass `AMDGPUPromoteKernelArguments` (PR #137655)
shiltian wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/137655?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#137655** https://app.graphite.dev/github/pr/llvm/llvm-project/137655?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/137655?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#137488** https://app.graphite.dev/github/pr/llvm/llvm-project/137488?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/137655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Remove the pass `AMDGPUPromoteKernelArguments` (PR #137655)
https://github.com/shiltian ready_for_review https://github.com/llvm/llvm-project/pull/137655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Remove the pass `AMDGPUPromoteKernelArguments` (PR #137655)
@@ -11,11 +10,9 @@ define amdgpu_kernel void @ptr_nest_3(ptr addrspace(1) nocapture readonly %Arg) ; CHECK-NEXT: entry: ; CHECK-NEXT:[[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT:[[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i32 [[I]] -; CHECK-NEXT:[[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber [[META0:![0-9]+]] -; CHECK-NEXT:[[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) -; CHECK-NEXT:[[P3:%.*]] = load ptr, ptr addrspace(1) [[P2_GLOBAL]], align 8, !amdgpu.noclobber [[META0]] -; CHECK-NEXT:[[P3_GLOBAL:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) -; CHECK-NEXT:store float 0.00e+00, ptr addrspace(1) [[P3_GLOBAL]], align 4 +; CHECK-NEXT:[[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8 +; CHECK-NEXT:[[P3:%.*]] = load ptr, ptr [[P2]], align 8 shiltian wrote: IIUC, `AMDGPUPromoteKernelArgumentsPass` assumes that if a struct is in AS1, then a pointer inside that struct is also in AS1. Technically, this isn't strictly correct, but in practice it might be "fine" to assume so, because sharing a non-AS1 pointer through global memory doesn't seem to have any practical use. That said, this case can't really be handled by `infer-address-space` or anything else at the moment. https://github.com/llvm/llvm-project/pull/137655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Remove the pass `AMDGPUPromoteKernelArguments` (PR #137655)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) Changes --- Patch is 30.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137655.diff 7 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (-9) - (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (-2) - (removed) llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp (-219) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (-13) - (modified) llvm/lib/Target/AMDGPU/CMakeLists.txt (-1) - (modified) llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll (+52-68) - (modified) llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn (-1) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 4ff761ec19b3c..edbded03957dd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -113,15 +113,6 @@ FunctionPass *createAMDGPULowerKernelArgumentsPass(); void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); extern char &AMDGPULowerKernelArgumentsID; -FunctionPass *createAMDGPUPromoteKernelArgumentsPass(); -void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &); -extern char &AMDGPUPromoteKernelArgumentsID; - -struct AMDGPUPromoteKernelArgumentsPass -: PassInfoMixin { - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); -}; - ModulePass *createAMDGPULowerKernelAttributesPass(); void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); extern char &AMDGPULowerKernelAttributesID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 98a1147ef6d66..30cf06d3b3dd0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -59,8 +59,6 @@ FUNCTION_PASS("amdgpu-lower-kernel-attributes", FUNCTION_PASS("amdgpu-promote-alloca", AMDGPUPromoteAllocaPass(*this)) FUNCTION_PASS("amdgpu-promote-alloca-to-vector", AMDGPUPromoteAllocaToVectorPass(*this)) -FUNCTION_PASS("amdgpu-promote-kernel-arguments", - AMDGPUPromoteKernelArgumentsPass()) FUNCTION_PASS("amdgpu-rewrite-undef-for-phi", AMDGPURewriteUndefForPHIPass()) FUNCTION_PASS("amdgpu-simplifylib", AMDGPUSimplifyLibCallsPass()) FUNCTION_PASS("amdgpu-unify-divergent-exit-nodes", diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp deleted file mode 100644 index 06819d05b4be6..0 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp +++ /dev/null @@ -1,219 +0,0 @@ -//===-- AMDGPUPromoteKernelArguments.cpp --===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===--===// -// -/// \file This pass recursively promotes generic pointer arguments of a kernel -/// into the global address space. -/// -/// The pass walks kernel's pointer arguments, then loads from them. If a loaded -/// value is a pointer and loaded pointer is unmodified in the kernel before the -/// load, then promote loaded pointer to global. Then recursively continue. -// -//===--===// - -#include "AMDGPU.h" -#include "AMDGPUMemoryUtils.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/InitializePasses.h" - -#define DEBUG_TYPE "amdgpu-promote-kernel-arguments" - -using namespace llvm; - -namespace { - -class AMDGPUPromoteKernelArguments : public FunctionPass { - MemorySSA *MSSA; - - AliasAnalysis *AA; - - Instruction *ArgCastInsertPt; - - SmallVector Ptrs; - - void enqueueUsers(Value *Ptr); - - bool promotePointer(Value *Ptr); - - bool promoteLoad(LoadInst *LI); - -public: - static char ID; - - AMDGPUPromoteKernelArguments() : FunctionPass(ID) {} - - bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA); - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { -AU.addRequired(); -AU.addRequired(); -AU.setPreservesAll(); - } -}; - -} // end anonymous namespace - -void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) { - SmallVector PtrUsers(Ptr->users()); - - while (!PtrUsers.empty()) { -Instruction *U = dyn_cast(PtrUsers.pop_back_val()); -if (!U) - continue; - -switch (U->getOpcode()) { -default: - break; -case Instruction::Load: { - LoadInst *LD = cast(U); - if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr && - !AMDGPU::isClobberedInFunction(LD, MSSA, AA)) -Ptrs.push_back(LD); - - break; -
[llvm-branch-commits] [llvm] [AMDGPU] Remove the pass `AMDGPUPromoteKernelArguments` (PR #137655)
@@ -11,11 +10,9 @@ define amdgpu_kernel void @ptr_nest_3(ptr addrspace(1) nocapture readonly %Arg) ; CHECK-NEXT: entry: ; CHECK-NEXT:[[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT:[[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i32 [[I]] -; CHECK-NEXT:[[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber [[META0:![0-9]+]] -; CHECK-NEXT:[[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) -; CHECK-NEXT:[[P3:%.*]] = load ptr, ptr addrspace(1) [[P2_GLOBAL]], align 8, !amdgpu.noclobber [[META0]] -; CHECK-NEXT:[[P3_GLOBAL:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) -; CHECK-NEXT:store float 0.00e+00, ptr addrspace(1) [[P3_GLOBAL]], align 4 +; CHECK-NEXT:[[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8 +; CHECK-NEXT:[[P3:%.*]] = load ptr, ptr [[P2]], align 8 rampitec wrote: The pass is important for performance, especially for HIP. A pointer passed from host cannot be anything but global and be valid. So, this is a surprising change. https://github.com/llvm/llvm-project/pull/137655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Remove the pass `AMDGPUPromoteKernelArguments` (PR #137655)
@@ -11,11 +10,9 @@ define amdgpu_kernel void @ptr_nest_3(ptr addrspace(1) nocapture readonly %Arg) ; CHECK-NEXT: entry: ; CHECK-NEXT:[[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT:[[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i32 [[I]] -; CHECK-NEXT:[[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber [[META0:![0-9]+]] -; CHECK-NEXT:[[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) -; CHECK-NEXT:[[P3:%.*]] = load ptr, ptr addrspace(1) [[P2_GLOBAL]], align 8, !amdgpu.noclobber [[META0]] -; CHECK-NEXT:[[P3_GLOBAL:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) -; CHECK-NEXT:store float 0.00e+00, ptr addrspace(1) [[P3_GLOBAL]], align 4 +; CHECK-NEXT:[[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8 +; CHECK-NEXT:[[P3:%.*]] = load ptr, ptr [[P2]], align 8 shiltian wrote: The parent PR in the stack (i.e. https://github.com/llvm/llvm-project/pull/137488) can cover the case of a direct use of pointer kernel argument, as shown in the `P2` check line, but it can't do a good job for indirection. The case under discussion here is something like: ``` struct S { int a; void *b; }; ``` The direct load/store of `S *` would be casted to the correct AS1, which is fine. However, the load/store of `S::b` is not, as shown in the `P3` of the check line. > A pointer passed from host cannot be anything but global and be valid. So, > this is a surprising change. That's why I said it is not 100% right but probably 100% practically meaningful. For example, I can do something like: ``` struct S { int a; void *b; }; __global__ void kernel1(S *p) { __shared__ ss[2]; p->b = (void *)ss; } __global__ void kernel2(S *p) { int *p = (int *)p->b; *p = 1; } int foo() { S *p; hipMalloc(&p, ...); kernel1<<<...>>>(p); kernel2<<<...>>>(p); } ``` Is this practically correct? No. Is this legal code? I think it is. It will just causes a runtime crash. https://github.com/llvm/llvm-project/pull/137655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 7721052 - Revert "[llvm] Add support for llvm IR atomicrmw fminimum/fmaximum instructio…"
Author: Jonathan Thackray Date: 2025-04-28T16:52:51+01:00 New Revision: 772105259bce16d640af6ea7f6e9fed61116d0ce URL: https://github.com/llvm/llvm-project/commit/772105259bce16d640af6ea7f6e9fed61116d0ce DIFF: https://github.com/llvm/llvm-project/commit/772105259bce16d640af6ea7f6e9fed61116d0ce.diff LOG: Revert "[llvm] Add support for llvm IR atomicrmw fminimum/fmaximum instructio…" This reverts commit ba420d8122239592a1fb7ad6efd2c186aecfdef5. Added: Modified: llvm/docs/GlobalISel/GenericOpcode.rst llvm/docs/LangRef.rst llvm/docs/ReleaseNotes.md llvm/include/llvm-c/Core.h llvm/include/llvm/AsmParser/LLToken.h llvm/include/llvm/Bitcode/LLVMBitCodes.h llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h llvm/include/llvm/CodeGen/ISDOpcodes.h llvm/include/llvm/CodeGen/SelectionDAGNodes.h llvm/include/llvm/IR/Instructions.h llvm/include/llvm/Support/TargetOpcodes.def llvm/include/llvm/Target/GenericOpcodes.td llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td llvm/include/llvm/Target/TargetSelectionDAG.td llvm/lib/AsmParser/LLLexer.cpp llvm/lib/AsmParser/LLParser.cpp llvm/lib/Bitcode/Reader/BitcodeReader.cpp llvm/lib/Bitcode/Writer/BitcodeWriter.cpp llvm/lib/CodeGen/AtomicExpandPass.cpp llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp llvm/lib/IR/Core.cpp llvm/lib/IR/Instructions.cpp llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp llvm/lib/Transforms/Utils/LowerAtomic.cpp llvm/test/Assembler/atomic.ll llvm/test/Bitcode/compatibility.ll llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td llvm/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll llvm/test/Transforms/InstCombine/atomicrmw.ll llvm/test/Transforms/LowerAtomic/atomic-load.ll Removed: diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index 987d19e2f6ce1..5291b42d80870 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -922,8 +922,7 @@ operands. G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_FADD, G_ATOMICRMW_FSUB, G_ATOMICRMW_FMAX, - G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAXIMUM, - G_ATOMICRMW_FMINIMUM, G_ATOMICRMW_UINC_WRAP, + G_ATOMICRMW_FMIN, G_ATOMICRMW_UINC_WRAP, G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_USUB_COND, G_ATOMICRMW_USUB_SAT diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 70c8c4d68443e..5bd1d29487139 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -11598,8 +11598,6 @@ operation. The operation must be one of the following keywords: - fsub - fmax - fmin -- fmaximum -- fminimum - uinc_wrap - udec_wrap - usub_cond @@ -11609,7 +11607,7 @@ For most of these operations, the type of '' must be an integer type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. For xchg, this may also be a floating point or a pointer type with the same size constraints -as integers. For fadd/fsub/fmax/fmin/fmaximum/fminimum, this must be a floating-point +as integers. For fadd/fsub/fmax/fmin, this must be a floating-point or fixed vector of floating-point type. The type of the '' operand must be a pointer to that type. If the ``atomicrmw`` is marked as ``volatile``, then the optimizer is not allowed to modify the @@ -11650,10 +11648,8 @@ operation argument: - umin: ``*ptr = *ptr < val ? *ptr : val`` (using an unsigned comparison) - fadd: ``*ptr = *ptr + val`` (using floating point arithmetic) - fsub: ``*ptr = *ptr - val`` (using floating point arithmetic) -- fmax: ``*ptr = maxnum(*ptr, val)`` (match the `llvm.maxnum.*` intrinsic) -- fmin: ``*ptr = minnum(*ptr, val)`` (match the `llvm.minnum.*` intrinsic) -- fmaximum: ``*ptr = maximum(*ptr, val)`` (match the `llvm.maximum.*` intrinsic) -- fminimum: ``*ptr = minimum(*ptr, val)`` (match the `llvm.minimum.*` intrinsic) +- fmax: ``*ptr = maxnum(*ptr, val)`` (match the `llvm.maxnum.*`` intrinsic) +- fmin: ``*ptr = minnum(*ptr, val)`` (match the `llvm.minnum.*`` intrinsic) - uinc_wrap: ``*ptr = (*ptr u>= val) ? 0 : (*ptr + 1)`` (increment value with wraparound to zero when incremented above
[llvm-branch-commits] [clang] [llvm] release/20.x: [RISCV] Allow `Zicsr`/`Zifencei` to duplicate with `g` (#136842) (PR #137490)
https://github.com/topperc approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/137490 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopVectorizer] Bundle partial reductions inside VPMulAccumulateReductionRecipe (PR #136173)
@@ -219,6 +219,8 @@ class TargetTransformInfo { /// Get the kind of extension that an instruction represents. static PartialReductionExtendKind getPartialReductionExtendKind(Instruction *I); + static PartialReductionExtendKind + getPartialReductionExtendKind(Instruction::CastOps ExtOpcode); SamTebbs33 wrote: Using the `CastOps` one in the other is a good idea. Done. https://github.com/llvm/llvm-project/pull/136173 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopVectorizer] Bundle partial reductions inside VPMulAccumulateReductionRecipe (PR #136173)
@@ -2056,55 +2056,6 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, } }; -/// A recipe for forming partial reductions. In the loop, an accumulator and SamTebbs33 wrote: I don't think I could make it an NFC change, since to conform to `VPReductionRecipe`, the accumulator and binop have to be swapped around. https://github.com/llvm/llvm-project/pull/136173 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Remove the pass `AMDGPUPromoteKernelArguments` (PR #137655)
https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/137655 None >From 531195729a62694205763accce085b46d9a5bc10 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Sun, 27 Apr 2025 13:38:11 -0400 Subject: [PATCH] [AMDGPU] Remove the pass `AMDGPUPromoteKernelArguments` --- llvm/lib/Target/AMDGPU/AMDGPU.h | 9 - llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 2 - .../AMDGPU/AMDGPUPromoteKernelArguments.cpp | 219 -- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 13 -- llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 - .../AMDGPU/promote-kernel-arguments.ll| 120 +- .../secondary/llvm/lib/Target/AMDGPU/BUILD.gn | 1 - 7 files changed, 52 insertions(+), 313 deletions(-) delete mode 100644 llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 4ff761ec19b3c..edbded03957dd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -113,15 +113,6 @@ FunctionPass *createAMDGPULowerKernelArgumentsPass(); void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); extern char &AMDGPULowerKernelArgumentsID; -FunctionPass *createAMDGPUPromoteKernelArgumentsPass(); -void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &); -extern char &AMDGPUPromoteKernelArgumentsID; - -struct AMDGPUPromoteKernelArgumentsPass -: PassInfoMixin { - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); -}; - ModulePass *createAMDGPULowerKernelAttributesPass(); void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); extern char &AMDGPULowerKernelAttributesID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 98a1147ef6d66..30cf06d3b3dd0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -59,8 +59,6 @@ FUNCTION_PASS("amdgpu-lower-kernel-attributes", FUNCTION_PASS("amdgpu-promote-alloca", AMDGPUPromoteAllocaPass(*this)) FUNCTION_PASS("amdgpu-promote-alloca-to-vector", AMDGPUPromoteAllocaToVectorPass(*this)) -FUNCTION_PASS("amdgpu-promote-kernel-arguments", - AMDGPUPromoteKernelArgumentsPass()) FUNCTION_PASS("amdgpu-rewrite-undef-for-phi", AMDGPURewriteUndefForPHIPass()) FUNCTION_PASS("amdgpu-simplifylib", AMDGPUSimplifyLibCallsPass()) FUNCTION_PASS("amdgpu-unify-divergent-exit-nodes", diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp deleted file mode 100644 index 06819d05b4be6..0 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp +++ /dev/null @@ -1,219 +0,0 @@ -//===-- AMDGPUPromoteKernelArguments.cpp --===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===--===// -// -/// \file This pass recursively promotes generic pointer arguments of a kernel -/// into the global address space. -/// -/// The pass walks kernel's pointer arguments, then loads from them. If a loaded -/// value is a pointer and loaded pointer is unmodified in the kernel before the -/// load, then promote loaded pointer to global. Then recursively continue. -// -//===--===// - -#include "AMDGPU.h" -#include "AMDGPUMemoryUtils.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/InitializePasses.h" - -#define DEBUG_TYPE "amdgpu-promote-kernel-arguments" - -using namespace llvm; - -namespace { - -class AMDGPUPromoteKernelArguments : public FunctionPass { - MemorySSA *MSSA; - - AliasAnalysis *AA; - - Instruction *ArgCastInsertPt; - - SmallVector Ptrs; - - void enqueueUsers(Value *Ptr); - - bool promotePointer(Value *Ptr); - - bool promoteLoad(LoadInst *LI); - -public: - static char ID; - - AMDGPUPromoteKernelArguments() : FunctionPass(ID) {} - - bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA); - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { -AU.addRequired(); -AU.addRequired(); -AU.setPreservesAll(); - } -}; - -} // end anonymous namespace - -void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) { - SmallVector PtrUsers(Ptr->users()); - - while (!PtrUsers.empty()) { -Instruction *U = dyn_cast(PtrUsers.pop_back_val()); -if (!U) - continue; - -switch (U->getOpcode()) { -default: - break; -case Instruction::Load: { - LoadInst *LD = cast(U); - if (LD->getPointerOperand()->stripInBoundsOffsets()
[llvm-branch-commits] [clang] 8da1b21 - Revert "[clang] Remove FEM_Indeterminable (#137247)"
Author: Oliver Hunt Date: 2025-04-28T08:50:35-07:00 New Revision: 8da1b21e8c45980741fc6f5d8f290ac9ac80d878 URL: https://github.com/llvm/llvm-project/commit/8da1b21e8c45980741fc6f5d8f290ac9ac80d878 DIFF: https://github.com/llvm/llvm-project/commit/8da1b21e8c45980741fc6f5d8f290ac9ac80d878.diff LOG: Revert "[clang] Remove FEM_Indeterminable (#137247)" This reverts commit 6b973f2baf66b05397d6caff3f29dbefabd5a214. Added: Modified: clang/include/clang/Basic/FPOptions.def clang/include/clang/Basic/LangOptions.def clang/include/clang/Basic/LangOptions.h Removed: diff --git a/clang/include/clang/Basic/FPOptions.def b/clang/include/clang/Basic/FPOptions.def index 90428c3c73c8b..85986b4ff0b9c 100644 --- a/clang/include/clang/Basic/FPOptions.def +++ b/clang/include/clang/Basic/FPOptions.def @@ -24,7 +24,7 @@ OPTION(NoHonorInfs, bool, 1, NoHonorNaNs) OPTION(NoSignedZero, bool, 1, NoHonorInfs) OPTION(AllowReciprocal, bool, 1, NoSignedZero) OPTION(AllowApproxFunc, bool, 1, AllowReciprocal) -OPTION(FPEvalMethod, LangOptions::FPEvalMethodKind, 2, AllowApproxFunc) +OPTION(FPEvalMethod, LangOptions::FPEvalMethodKind, 3, AllowApproxFunc) OPTION(Float16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, FPEvalMethod) OPTION(BFloat16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, Float16ExcessPrecision) OPTION(MathErrno, bool, 1, BFloat16ExcessPrecision) diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 930c1c06d1a76..85ca523c44157 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -347,7 +347,7 @@ BENIGN_ENUM_LANGOPT(DefaultFPContractMode, FPModeKind, 2, FPM_Off, "FP contracti COMPATIBLE_LANGOPT(ExpStrictFP, 1, false, "Enable experimental strict floating point") BENIGN_LANGOPT(RoundingMath, 1, false, "Do not assume default floating-point rounding behavior") BENIGN_ENUM_LANGOPT(FPExceptionMode, FPExceptionModeKind, 2, FPE_Default, "FP Exception Behavior Mode type") -BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, "FP type used for floating point arithmetic") +BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 3, FEM_UnsetOnCommandLine, "FP type used for floating point arithmetic") ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for Float16 arithmetic") ENUM_LANGOPT(BFloat16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for BFloat16 arithmetic") LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment") diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 1bfc0d8e88556..bbebf7af9ede3 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -304,7 +304,10 @@ class LangOptionsBase { }; /// Possible float expression evaluation method choices. - enum FPEvalMethodKind : unsigned { + enum FPEvalMethodKind { +/// The evaluation method cannot be determined or is inconsistent for this +/// target. +FEM_Indeterminable = -1, /// Use the declared type for fp arithmetic. FEM_Source = 0, /// Use the type double for fp arithmetic. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Remove the pass `AMDGPUPromoteKernelArguments` (PR #137655)
@@ -11,11 +10,9 @@ define amdgpu_kernel void @ptr_nest_3(ptr addrspace(1) nocapture readonly %Arg) ; CHECK-NEXT: entry: ; CHECK-NEXT:[[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT:[[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i32 [[I]] -; CHECK-NEXT:[[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber [[META0:![0-9]+]] -; CHECK-NEXT:[[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) -; CHECK-NEXT:[[P3:%.*]] = load ptr, ptr addrspace(1) [[P2_GLOBAL]], align 8, !amdgpu.noclobber [[META0]] -; CHECK-NEXT:[[P3_GLOBAL:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) -; CHECK-NEXT:store float 0.00e+00, ptr addrspace(1) [[P3_GLOBAL]], align 4 +; CHECK-NEXT:[[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8 +; CHECK-NEXT:[[P3:%.*]] = load ptr, ptr [[P2]], align 8 shiltian wrote: Agreed. Effectively for a target independent pass, we can say a pointer kernel argument has to be in AS X w/o having a dedicated pass for that. This is currently done in https://github.com/llvm/llvm-project/pull/137488. However, when it comes to an indirection, it can't be handled in a target independent way. We can't really fully remove this pass at the moment. https://github.com/llvm/llvm-project/pull/137655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [AArch64][SME] Prevent spills of ZT0 when ZA is not enabled (PR #137683)
https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/137683 This cherry-picks https://github.com/llvm/llvm-project/pull/132722 and https://github.com/llvm/llvm-project/pull/136726 (the latter is based on the former). These patches are needed to prevent invalid codegen as attempting to store ZT0 without ZA enabled results in a SIGILL. >From c2e81b014aebc262b4db59eb7fbdde2b1376a39a Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Tue, 25 Mar 2025 10:09:25 + Subject: [PATCH 1/2] [AArch64][SME2] Don't preserve ZT0 around SME ABI routines (#132722) This caused ZT0 to be preserved around `__arm_tpidr2_save` in functions with "aarch64_new_zt0". The block in which `__arm_tpidr2_save` is called is added by the SMEABIPass and may be reachable in cases where ZA has not been enabled* (so using `str zt0` is invalid). * (when za_save_buffer is null and num_za_save_slices is zero) --- .../AArch64/Utils/AArch64SMEAttributes.h | 3 +- .../AArch64/sme-disable-gisel-fisel.ll| 9 +-- llvm/test/CodeGen/AArch64/sme-zt0-state.ll| 61 +-- 3 files changed, 46 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h index fb093da70c46b..a3ebf764a6e0c 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h @@ -133,7 +133,8 @@ class SMEAttrs { bool hasZT0State() const { return isNewZT0() || sharesZT0(); } bool requiresPreservingZT0(const SMEAttrs &Callee) const { return hasZT0State() && !Callee.sharesZT0() && - !Callee.hasAgnosticZAInterface(); + !Callee.hasAgnosticZAInterface() && + !(Callee.Bitmask & SME_ABI_Routine); } bool requiresDisablingZABeforeCall(const SMEAttrs &Callee) const { return hasZT0State() && !hasZAState() && Callee.hasPrivateZAInterface() && diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll index 33d08beae2ca7..4a52bf27a7591 100644 --- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll +++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll @@ -475,16 +475,12 @@ declare double @zt0_shared_callee(double) "aarch64_inout_zt0" define double @zt0_new_caller_to_zt0_shared_callee(double %x) nounwind noinline optnone "aarch64_new_zt0" { ; CHECK-COMMON-LABEL: zt0_new_caller_to_zt0_shared_callee: ; CHECK-COMMON: // %bb.0: // %prelude -; CHECK-COMMON-NEXT:sub sp, sp, #80 -; CHECK-COMMON-NEXT:str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-COMMON-NEXT:str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-COMMON-NEXT:mrs x8, TPIDR2_EL0 ; CHECK-COMMON-NEXT:cbz x8, .LBB13_2 ; CHECK-COMMON-NEXT:b .LBB13_1 ; CHECK-COMMON-NEXT: .LBB13_1: // %save.za -; CHECK-COMMON-NEXT:mov x8, sp -; CHECK-COMMON-NEXT:str zt0, [x8] ; CHECK-COMMON-NEXT:bl __arm_tpidr2_save -; CHECK-COMMON-NEXT:ldr zt0, [x8] ; CHECK-COMMON-NEXT:msr TPIDR2_EL0, xzr ; CHECK-COMMON-NEXT:b .LBB13_2 ; CHECK-COMMON-NEXT: .LBB13_2: // %entry @@ -495,8 +491,7 @@ define double @zt0_new_caller_to_zt0_shared_callee(double %x) nounwind noinline ; CHECK-COMMON-NEXT:fmov d1, x8 ; CHECK-COMMON-NEXT:fadd d0, d0, d1 ; CHECK-COMMON-NEXT:smstop za -; CHECK-COMMON-NEXT:ldr x30, [sp, #64] // 8-byte Folded Reload -; CHECK-COMMON-NEXT:add sp, sp, #80 +; CHECK-COMMON-NEXT:ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-COMMON-NEXT:ret entry: %call = call double @zt0_shared_callee(double %x) diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll index 312537630e77a..500fff4eb20db 100644 --- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll +++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll @@ -112,7 +112,7 @@ define void @za_zt0_shared_caller_za_zt0_shared_callee() "aarch64_inout_za" "aar ret void; } -; New-ZA Callee +; New-ZT0 Callee ; Expect spill & fill of ZT0 around call ; Expect smstop/smstart za around call @@ -134,6 +134,39 @@ define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind { ret void; } +; New-ZT0 Callee + +; Expect commit of lazy-save if ZA is dormant +; Expect smstart ZA & clear ZT0 +; Expect spill & fill of ZT0 around call +; Before return, expect smstop ZA +define void @zt0_new_caller_zt0_new_callee() "aarch64_new_zt0" nounwind { +; CHECK-LABEL: zt0_new_caller_zt0_new_callee: +; CHECK: // %bb.0: // %prelude +; CHECK-NEXT:sub sp, sp, #80 +; CHECK-NEXT:stp x30, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT:mrs x8, TPIDR2_EL0 +; CHECK-NEXT:cbz x8, .LBB6_2 +; CHECK-NEXT: // %bb.1: // %save.za +; CHECK-NEXT:bl __arm_tpidr2_save +; CHECK-NEXT:msr TPIDR2_EL0, xzr +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT:smstart za +; CHECK-NEXT:zero { zt0 } +; CHECK-NEXT:mov x19, sp +; CHECK-NEXT:
[llvm-branch-commits] [llvm] release/20.x: [AArch64][SME] Prevent spills of ZT0 when ZA is not enabled (PR #137683)
llvmbot wrote: @llvm/pr-subscribers-backend-aarch64 Author: Benjamin Maxwell (MacDue) Changes This cherry-picks https://github.com/llvm/llvm-project/pull/132722 and https://github.com/llvm/llvm-project/pull/136726 (the latter is based on the former). These patches are needed to prevent invalid codegen as attempting to store ZT0 without ZA enabled results in a SIGILL. --- Full diff: https://github.com/llvm/llvm-project/pull/137683.diff 9 Files Affected: - (modified) llvm/lib/IR/Verifier.cpp (+3) - (modified) llvm/lib/Target/AArch64/SMEABIPass.cpp (+12-4) - (modified) llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp (+2) - (modified) llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h (+5-3) - (modified) llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll (+2-7) - (added) llvm/test/CodeGen/AArch64/sme-new-zt0-function.ll (+14) - (modified) llvm/test/CodeGen/AArch64/sme-zt0-state.ll (+75-19) - (modified) llvm/test/Verifier/sme-attributes.ll (+3) - (modified) llvm/unittests/Target/AArch64/SMEAttributesTest.cpp (+30) ``diff diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 8432779c107de..551c00a518b8f 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2818,6 +2818,9 @@ void Verifier::visitFunction(const Function &F) { Check(!Attrs.hasAttrSomewhere(Attribute::ElementType), "Attribute 'elementtype' can only be applied to a callsite.", &F); + Check(!Attrs.hasFnAttr("aarch64_zt0_undef"), +"Attribute 'aarch64_zt0_undef' can only be applied to a callsite."); + if (Attrs.hasFnAttr(Attribute::Naked)) for (const Argument &Arg : F.args()) Check(Arg.use_empty(), "cannot use argument of naked function", &Arg); diff --git a/llvm/lib/Target/AArch64/SMEABIPass.cpp b/llvm/lib/Target/AArch64/SMEABIPass.cpp index bb885d86392fe..b6685497e1fd1 100644 --- a/llvm/lib/Target/AArch64/SMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/SMEABIPass.cpp @@ -54,14 +54,22 @@ FunctionPass *llvm::createSMEABIPass() { return new SMEABI(); } //===--===// // Utility function to emit a call to __arm_tpidr2_save and clear TPIDR2_EL0. -void emitTPIDR2Save(Module *M, IRBuilder<> &Builder) { +void emitTPIDR2Save(Module *M, IRBuilder<> &Builder, bool ZT0IsUndef = false) { + auto &Ctx = M->getContext(); auto *TPIDR2SaveTy = FunctionType::get(Builder.getVoidTy(), {}, /*IsVarArgs=*/false); - auto Attrs = AttributeList().addFnAttribute(M->getContext(), - "aarch64_pstate_sm_compatible"); + auto Attrs = + AttributeList().addFnAttribute(Ctx, "aarch64_pstate_sm_compatible"); FunctionCallee Callee = M->getOrInsertFunction("__arm_tpidr2_save", TPIDR2SaveTy, Attrs); CallInst *Call = Builder.CreateCall(Callee); + + // If ZT0 is undefined (i.e. we're at the entry of a "new_zt0" function), mark + // that on the __arm_tpidr2_save call. This prevents an unnecessary spill of + // ZT0 that can occur before ZA is enabled. + if (ZT0IsUndef) +Call->addFnAttr(Attribute::get(Ctx, "aarch64_zt0_undef")); + Call->setCallingConv( CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0); @@ -119,7 +127,7 @@ bool SMEABI::updateNewStateFunctions(Module *M, Function *F, // Create a call __arm_tpidr2_save, which commits the lazy save. Builder.SetInsertPoint(&SaveBB->back()); -emitTPIDR2Save(M, Builder); +emitTPIDR2Save(M, Builder, /*ZT0IsUndef=*/FnAttrs.isNewZT0()); // Enable pstate.za at the start of the function. Builder.SetInsertPoint(&OrigBB->front()); diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp index bf16acd7f8f7e..76d2ac6a601e5 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp @@ -75,6 +75,8 @@ SMEAttrs::SMEAttrs(const AttributeList &Attrs) { Bitmask |= SM_Body; if (Attrs.hasFnAttr("aarch64_za_state_agnostic")) Bitmask |= ZA_State_Agnostic; + if (Attrs.hasFnAttr("aarch64_zt0_undef")) +Bitmask |= ZT0_Undef; if (Attrs.hasFnAttr("aarch64_in_za")) Bitmask |= encodeZAState(StateValue::In); if (Attrs.hasFnAttr("aarch64_out_za")) diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h index fb093da70c46b..1691d4fec8b68 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h @@ -43,9 +43,10 @@ class SMEAttrs { SM_Body = 1 << 2, // aarch64_pstate_sm_body SME_ABI_Routine = 1 << 3, // Used for SME ABI routines to avoid lazy saves ZA_State_Agnostic = 1 << 4, -ZA_Shift = 5, +ZT0_Undef = 1 << 5, // Use to mark ZT0 as undef to avoid spills +ZA_Shift = 6, ZA_Mask = 0b111 << ZA_Shift, -ZT0_Shift =
[llvm-branch-commits] [llvm] release/20.x: [AArch64][SME] Prevent spills of ZT0 when ZA is not enabled (PR #137683)
https://github.com/MacDue milestoned https://github.com/llvm/llvm-project/pull/137683 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] ad7ed85 - Revert "[CodeGen] Use OwningArrayRef in NodeMetadata (NFC) (#137539)"
Author: Florian Mayer Date: 2025-04-28T11:34:31-07:00 New Revision: ad7ed8586a1ab0548bd307b2bead3039e5cbb236 URL: https://github.com/llvm/llvm-project/commit/ad7ed8586a1ab0548bd307b2bead3039e5cbb236 DIFF: https://github.com/llvm/llvm-project/commit/ad7ed8586a1ab0548bd307b2bead3039e5cbb236.diff LOG: Revert "[CodeGen] Use OwningArrayRef in NodeMetadata (NFC) (#137539)" This reverts commit 08beaa868ecc4846755f8679a68e79f4642b268b. Added: Modified: llvm/include/llvm/CodeGen/RegAllocPBQP.h Removed: diff --git a/llvm/include/llvm/CodeGen/RegAllocPBQP.h b/llvm/include/llvm/CodeGen/RegAllocPBQP.h index 9c74cd2ebc0b1..234f1c6ff115a 100644 --- a/llvm/include/llvm/CodeGen/RegAllocPBQP.h +++ b/llvm/include/llvm/CodeGen/RegAllocPBQP.h @@ -15,7 +15,6 @@ #ifndef LLVM_CODEGEN_REGALLOCPBQP_H #define LLVM_CODEGEN_REGALLOCPBQP_H -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/CodeGen/PBQP/CostAllocator.h" @@ -184,14 +183,18 @@ class NodeMetadata { NodeMetadata() = default; NodeMetadata(const NodeMetadata &Other) - : RS(Other.RS), DeniedOpts(Other.DeniedOpts), -OptUnsafeEdges(ArrayRef(Other.OptUnsafeEdges)), -VReg(Other.VReg), AllowedRegs(Other.AllowedRegs) + : RS(Other.RS), NumOpts(Other.NumOpts), DeniedOpts(Other.DeniedOpts), +OptUnsafeEdges(new unsigned[NumOpts]), VReg(Other.VReg), +AllowedRegs(Other.AllowedRegs) #if LLVM_ENABLE_ABI_BREAKING_CHECKS , everConservativelyAllocatable(Other.everConservativelyAllocatable) #endif { +if (NumOpts > 0) { + std::copy(&Other.OptUnsafeEdges[0], &Other.OptUnsafeEdges[NumOpts], +&OptUnsafeEdges[0]); +} } NodeMetadata(NodeMetadata &&) = default; @@ -206,7 +209,8 @@ class NodeMetadata { const AllowedRegVector& getAllowedRegs() const { return *AllowedRegs; } void setup(const Vector& Costs) { -OptUnsafeEdges = OwningArrayRef(Costs.getLength() - 1); +NumOpts = Costs.getLength() - 1; +OptUnsafeEdges = std::unique_ptr(new unsigned[NumOpts]()); } ReductionState getReductionState() const { return RS; } @@ -226,7 +230,7 @@ class NodeMetadata { DeniedOpts += Transpose ? MD.getWorstRow() : MD.getWorstCol(); const bool* UnsafeOpts = Transpose ? MD.getUnsafeCols() : MD.getUnsafeRows(); -for (unsigned i = 0; i < OptUnsafeEdges.size(); ++i) +for (unsigned i = 0; i < NumOpts; ++i) OptUnsafeEdges[i] += UnsafeOpts[i]; } @@ -234,13 +238,14 @@ class NodeMetadata { DeniedOpts -= Transpose ? MD.getWorstRow() : MD.getWorstCol(); const bool* UnsafeOpts = Transpose ? MD.getUnsafeCols() : MD.getUnsafeRows(); -for (unsigned i = 0; i < OptUnsafeEdges.size(); ++i) +for (unsigned i = 0; i < NumOpts; ++i) OptUnsafeEdges[i] -= UnsafeOpts[i]; } bool isConservativelyAllocatable() const { -return (DeniedOpts < OptUnsafeEdges.size()) || - llvm::is_contained(OptUnsafeEdges, 0); +return (DeniedOpts < NumOpts) || + (std::find(&OptUnsafeEdges[0], &OptUnsafeEdges[NumOpts], 0) != + &OptUnsafeEdges[NumOpts]); } #if LLVM_ENABLE_ABI_BREAKING_CHECKS @@ -251,8 +256,9 @@ class NodeMetadata { private: ReductionState RS = Unprocessed; + unsigned NumOpts = 0; unsigned DeniedOpts = 0; - OwningArrayRef OptUnsafeEdges; + std::unique_ptr OptUnsafeEdges; Register VReg; GraphMetadata::AllowedRegVecRef AllowedRegs; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [mlir] [mlir][OpenMP] Convert omp.cancellation_point to LLVMIR (PR #137205)
https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/137205 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [mlir] [mlir][OpenMP] Convert omp.cancellation_point to LLVMIR (PR #137205)
https://github.com/Meinersbur approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/137205 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [mlir] [mlir][OpenMP] Convert omp.cancellation_point to LLVMIR (PR #137205)
@@ -1589,11 +1592,12 @@ cleanupPrivateVars(llvm::IRBuilderBase &builder, /// Returns true if the construct contains omp.cancel or omp.cancellation_point static bool constructIsCancellable(Operation *op) { - // omp.cancel must be "closely nested" so it will be visible and not inside of - // funcion calls. This is enforced by the verifier. + // omp.cancel and omp.cancellation_point must be "closely nested" so they will + // be visible and not inside of funcion calls. This is enforced by the Meinersbur wrote: ```suggestion // be visible and not inside of function calls. This is enforced by the ``` https://github.com/llvm/llvm-project/pull/137205 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [mlir] [mlir][OpenMP] Convert omp.cancellation_point to LLVMIR (PR #137205)
Meinersbur wrote: The Windows CI failure look related: ``` C:\ws\src\clang\test\OpenMP\cancel_codegen.cpp -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | c:\ws\src\build\bin\filecheck.exe --allow-unused-prefixes C:\ws\src\clang\test\OpenMP\cancel_codegen.cpp --check-prefix=CHECK1 # executed command: 'c:\ws\src\build\bin\clang.exe' -cc1 -internal-isystem 'C:\ws\src\build\lib\clang\21\include' -nostdsysteminc -fopenmp -fopenmp-version=45 -std=c++11 -include-pch 'C:\ws\src\build\tools\clang\test\OpenMP\Output\cancel_codegen.cpp.tmp.0' -verify 'C:\ws\src\clang\test\OpenMP\cancel_codegen.cpp' -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - # note: command had no output on stdout or stderr # executed command: 'c:\ws\src\build\bin\filecheck.exe' --allow-unused-prefixes 'C:\ws\src\clang\test\OpenMP\cancel_codegen.cpp' --check-prefix=CHECK1 # note: command had no output on stdout or stderr # RUN: at line 6 c:\ws\src\build\bin\clang.exe -cc1 -internal-isystem C:\ws\src\build\lib\clang\21\include -nostdsysteminc -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - C:\ws\src\clang\test\OpenMP\cancel_codegen.cpp | c:\ws\src\build\bin\filecheck.exe --allow-unused-prefixes C:\ws\src\clang\test\OpenMP\cancel_codegen.cpp --check-prefix=CHECK3 # executed command: 'c:\ws\src\build\bin\clang.exe' -cc1 -internal-isystem 'C:\ws\src\build\lib\clang\21\include' -nostdsysteminc -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - 'C:\ws\src\clang\test\OpenMP\cancel_codegen.cpp' # .---command stderr # | While deleting: label %omp_section_loop.exit # | Use still stuck around after Def is destroyed: br # | Uses remain when a value is destroyed! # | UNREACHABLE executed at C:\ws\src\llvm\lib\IR\Value.cpp:102! # | PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script. # | Stack dump: # | 0. Program arguments: c:\\ws\\src\\build\\bin\\clang.exe -cc1 -internal-isystem C:\\ws\\src\\build\\lib\\clang\\21\\include -nostdsysteminc -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - C:\\ws\\src\\clang\\test\\OpenMP\\cancel_codegen.cpp ``` https://github.com/llvm/llvm-project/pull/137205 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Fix ninf propagation for fcmp+sel -> minmax (#136433) (PR #137605)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/137605 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Fix ninf propagation for fcmp+sel -> minmax (#136433) (PR #137605)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/137605 Backport a0c4876eede8e68de22dc5296b037556c7cca981 Requested by: @dtcxzyw >From 3e865307db75052a0e317e71fe0526bfc2545fbd Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 28 Apr 2025 17:24:46 +0800 Subject: [PATCH] [InstCombine] Fix ninf propagation for fcmp+sel -> minmax (#136433) Proof: https://alive2.llvm.org/ce/z/nCrvfr Closes https://github.com/llvm/llvm-project/issues/136430 (cherry picked from commit a0c4876eede8e68de22dc5296b037556c7cca981) --- .../Transforms/InstCombine/InstCombineSelect.cpp | 8 ++-- .../Transforms/InstCombine/fcmp-fadd-select.ll | 2 +- llvm/test/Transforms/InstCombine/minmax-fp.ll | 14 -- .../InstCombine/unordered-fcmp-select.ll | 2 +- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 29c5cef84ccdb..932628be84846 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3898,16 +3898,20 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (match(&SI, m_OrdOrUnordFMax(m_Value(X), m_Value(Y { Value *BinIntr = Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, X, Y, &SI); -if (auto *BinIntrInst = dyn_cast(BinIntr)) +if (auto *BinIntrInst = dyn_cast(BinIntr)) { BinIntrInst->setHasNoNaNs(FCmp->hasNoNaNs()); + BinIntrInst->setHasNoInfs(FCmp->hasNoInfs()); +} return replaceInstUsesWith(SI, BinIntr); } if (match(&SI, m_OrdOrUnordFMin(m_Value(X), m_Value(Y { Value *BinIntr = Builder.CreateBinaryIntrinsic(Intrinsic::minnum, X, Y, &SI); -if (auto *BinIntrInst = dyn_cast(BinIntr)) +if (auto *BinIntrInst = dyn_cast(BinIntr)) { BinIntrInst->setHasNoNaNs(FCmp->hasNoNaNs()); + BinIntrInst->setHasNoInfs(FCmp->hasNoInfs()); +} return replaceInstUsesWith(SI, BinIntr); } } diff --git a/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll b/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll index 15fad55db8df1..e05ef6df1d41b 100644 --- a/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll +++ b/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll @@ -663,7 +663,7 @@ define float @test_fcmp_ogt_fadd_select_rewrite_flags2(float %in) { define float @test_fcmp_ogt_fadd_select_rewrite_and_fastmath(float %in) { ; CHECK-LABEL: define float @test_fcmp_ogt_fadd_select_rewrite_and_fastmath( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT:[[SEL_NEW:%.*]] = call fast float @llvm.maxnum.f32(float [[IN]], float 0.00e+00) +; CHECK-NEXT:[[SEL_NEW:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.maxnum.f32(float [[IN]], float 0.00e+00) ; CHECK-NEXT:[[ADD_NEW:%.*]] = fadd fast float [[SEL_NEW]], 1.00e+00 ; CHECK-NEXT:ret float [[ADD_NEW]] ; diff --git a/llvm/test/Transforms/InstCombine/minmax-fp.ll b/llvm/test/Transforms/InstCombine/minmax-fp.ll index 4fe8cf374344e..a8470a20365e9 100644 --- a/llvm/test/Transforms/InstCombine/minmax-fp.ll +++ b/llvm/test/Transforms/InstCombine/minmax-fp.ll @@ -331,7 +331,7 @@ define float @maxnum_ogt_fmf_on_select(float %a, float %b) { define <2 x float> @maxnum_oge_fmf_on_select(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: @maxnum_oge_fmf_on_select( -; CHECK-NEXT:[[F:%.*]] = call ninf nsz <2 x float> @llvm.maxnum.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) +; CHECK-NEXT:[[F:%.*]] = call nsz <2 x float> @llvm.maxnum.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) ; CHECK-NEXT:ret <2 x float> [[F]] ; %cond = fcmp oge <2 x float> %a, %b @@ -383,6 +383,16 @@ define float @maxnum_no_nnan(float %a, float %b) { ret float %f } +define float @minnum_olt_fmf_on_select_both_ninf(float %a, float %b) { +; CHECK-LABEL: @minnum_olt_fmf_on_select_both_ninf( +; CHECK-NEXT:[[F:%.*]] = call ninf nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]]) +; CHECK-NEXT:ret float [[F]] +; + %cond = fcmp ninf olt float %a, %b + %f = select nnan ninf nsz i1 %cond, float %a, float %b + ret float %f +} + define float @minnum_olt_fmf_on_select(float %a, float %b) { ; CHECK-LABEL: @minnum_olt_fmf_on_select( ; CHECK-NEXT:[[F:%.*]] = call nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]]) @@ -395,7 +405,7 @@ define float @minnum_olt_fmf_on_select(float %a, float %b) { define <2 x float> @minnum_ole_fmf_on_select(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: @minnum_ole_fmf_on_select( -; CHECK-NEXT:[[F:%.*]] = call ninf nsz <2 x float> @llvm.minnum.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) +; CHECK-NEXT:[[F:%.*]] = call nsz <2 x float> @llvm.minnum.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) ; CHECK-NEXT:
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Fix ninf propagation for fcmp+sel -> minmax (#136433) (PR #137605)
llvmbot wrote: @nikic What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/137605 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Do not fold logical is_finite test (#136851) (PR #137606)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/137606 Backport 8abc917fe04140b6c6088a67e0398f637efde808 Requested by: @dtcxzyw >From 42d442a1b0eb00df2d5cf8d5ce1dede550fd3d3f Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Thu, 24 Apr 2025 00:12:30 +0800 Subject: [PATCH] [InstCombine] Do not fold logical is_finite test (#136851) This patch disables the fold for logical is_finite test (i.e., `and (fcmp ord x, 0), (fcmp u* x, inf) -> fcmp o* x, inf`). It is still possible to allow this fold for several logical cases (e.g., `stripSignOnlyFPOps(RHS0)` does not strip any operations). Since this patch has no real-world impact, I decided to disable this fold for all logical cases. Alive2: https://alive2.llvm.org/ce/z/aH4LC7 Closes https://github.com/llvm/llvm-project/issues/136650. (cherry picked from commit 8abc917fe04140b6c6088a67e0398f637efde808) --- .../InstCombine/InstCombineAndOrXor.cpp | 4 ++- llvm/test/Transforms/InstCombine/and-fcmp.ll | 28 +++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index ca8a20b4b7312..ebb84d177a832 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1475,7 +1475,9 @@ Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, } } - if (IsAnd && stripSignOnlyFPOps(LHS0) == stripSignOnlyFPOps(RHS0)) { + // This transform is not valid for a logical select. + if (!IsLogicalSelect && IsAnd && + stripSignOnlyFPOps(LHS0) == stripSignOnlyFPOps(RHS0)) { // and (fcmp ord x, 0), (fcmp u* x, inf) -> fcmp o* x, inf // and (fcmp ord x, 0), (fcmp u* fabs(x), inf) -> fcmp o* x, inf if (Value *Left = matchIsFiniteTest(Builder, LHS, RHS)) diff --git a/llvm/test/Transforms/InstCombine/and-fcmp.ll b/llvm/test/Transforms/InstCombine/and-fcmp.ll index c7bbc8ab56f9a..ec1b6ad2ea168 100644 --- a/llvm/test/Transforms/InstCombine/and-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/and-fcmp.ll @@ -4990,6 +4990,34 @@ define i1 @clang_builtin_isnormal_inf_check_copysign(half %x, half %y) { ret i1 %and } +define i1 @clang_builtin_isnormal_inf_check_copysign_logical_select(half %x, half %y) { +; CHECK-LABEL: @clang_builtin_isnormal_inf_check_copysign_logical_select( +; CHECK-NEXT:[[COPYSIGN_X:%.*]] = call half @llvm.copysign.f16(half [[X:%.*]], half [[Y:%.*]]) +; CHECK-NEXT:[[ORD:%.*]] = fcmp ord half [[X]], 0xH +; CHECK-NEXT:[[CMP:%.*]] = fcmp ueq half [[COPYSIGN_X]], 0xH7C00 +; CHECK-NEXT:[[AND:%.*]] = select i1 [[ORD]], i1 [[CMP]], i1 false +; CHECK-NEXT:ret i1 [[AND]] +; + %copysign.x = call half @llvm.copysign.f16(half %x, half %y) + %ord = fcmp ord half %x, 0.0 + %cmp = fcmp uge half %copysign.x, 0xH7C00 + %and = select i1 %ord, i1 %cmp, i1 false + ret i1 %and +} + +define i1 @clang_builtin_isnormal_inf_check_fabs_nnan_logical_select(half %x) { +; CHECK-LABEL: @clang_builtin_isnormal_inf_check_fabs_nnan_logical_select( +; CHECK-NEXT:[[COPYSIGN_X:%.*]] = call half @llvm.fabs.f16(half [[X:%.*]]) +; CHECK-NEXT:[[AND:%.*]] = fcmp oeq half [[COPYSIGN_X]], 0xH7C00 +; CHECK-NEXT:ret i1 [[AND]] +; + %copysign.x = call nnan half @llvm.fabs.f16(half %x) + %ord = fcmp ord half %x, 0.0 + %cmp = fcmp uge half %copysign.x, 0xH7C00 + %and = select i1 %ord, i1 %cmp, i1 false + ret i1 %and +} + define i1 @isnormal_logical_select_0(half %x) { ; CHECK-LABEL: @isnormal_logical_select_0( ; CHECK-NEXT:[[FABS_X:%.*]] = call half @llvm.fabs.f16(half [[X:%.*]]) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Do not fold logical is_finite test (#136851) (PR #137606)
llvmbot wrote: @nikic What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/137606 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Do not fold logical is_finite test (#136851) (PR #137606)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/137606 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Fix ninf propagation for fcmp+sel -> minmax (#136433) (PR #137605)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: None (llvmbot) Changes Backport a0c4876eede8e68de22dc5296b037556c7cca981 Requested by: @dtcxzyw --- Full diff: https://github.com/llvm/llvm-project/pull/137605.diff 4 Files Affected: - (modified) llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp (+6-2) - (modified) llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll (+1-1) - (modified) llvm/test/Transforms/InstCombine/minmax-fp.ll (+12-2) - (modified) llvm/test/Transforms/InstCombine/unordered-fcmp-select.ll (+1-1) ``diff diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 29c5cef84ccdb..932628be84846 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3898,16 +3898,20 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (match(&SI, m_OrdOrUnordFMax(m_Value(X), m_Value(Y { Value *BinIntr = Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, X, Y, &SI); -if (auto *BinIntrInst = dyn_cast(BinIntr)) +if (auto *BinIntrInst = dyn_cast(BinIntr)) { BinIntrInst->setHasNoNaNs(FCmp->hasNoNaNs()); + BinIntrInst->setHasNoInfs(FCmp->hasNoInfs()); +} return replaceInstUsesWith(SI, BinIntr); } if (match(&SI, m_OrdOrUnordFMin(m_Value(X), m_Value(Y { Value *BinIntr = Builder.CreateBinaryIntrinsic(Intrinsic::minnum, X, Y, &SI); -if (auto *BinIntrInst = dyn_cast(BinIntr)) +if (auto *BinIntrInst = dyn_cast(BinIntr)) { BinIntrInst->setHasNoNaNs(FCmp->hasNoNaNs()); + BinIntrInst->setHasNoInfs(FCmp->hasNoInfs()); +} return replaceInstUsesWith(SI, BinIntr); } } diff --git a/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll b/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll index 15fad55db8df1..e05ef6df1d41b 100644 --- a/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll +++ b/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll @@ -663,7 +663,7 @@ define float @test_fcmp_ogt_fadd_select_rewrite_flags2(float %in) { define float @test_fcmp_ogt_fadd_select_rewrite_and_fastmath(float %in) { ; CHECK-LABEL: define float @test_fcmp_ogt_fadd_select_rewrite_and_fastmath( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT:[[SEL_NEW:%.*]] = call fast float @llvm.maxnum.f32(float [[IN]], float 0.00e+00) +; CHECK-NEXT:[[SEL_NEW:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.maxnum.f32(float [[IN]], float 0.00e+00) ; CHECK-NEXT:[[ADD_NEW:%.*]] = fadd fast float [[SEL_NEW]], 1.00e+00 ; CHECK-NEXT:ret float [[ADD_NEW]] ; diff --git a/llvm/test/Transforms/InstCombine/minmax-fp.ll b/llvm/test/Transforms/InstCombine/minmax-fp.ll index 4fe8cf374344e..a8470a20365e9 100644 --- a/llvm/test/Transforms/InstCombine/minmax-fp.ll +++ b/llvm/test/Transforms/InstCombine/minmax-fp.ll @@ -331,7 +331,7 @@ define float @maxnum_ogt_fmf_on_select(float %a, float %b) { define <2 x float> @maxnum_oge_fmf_on_select(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: @maxnum_oge_fmf_on_select( -; CHECK-NEXT:[[F:%.*]] = call ninf nsz <2 x float> @llvm.maxnum.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) +; CHECK-NEXT:[[F:%.*]] = call nsz <2 x float> @llvm.maxnum.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) ; CHECK-NEXT:ret <2 x float> [[F]] ; %cond = fcmp oge <2 x float> %a, %b @@ -383,6 +383,16 @@ define float @maxnum_no_nnan(float %a, float %b) { ret float %f } +define float @minnum_olt_fmf_on_select_both_ninf(float %a, float %b) { +; CHECK-LABEL: @minnum_olt_fmf_on_select_both_ninf( +; CHECK-NEXT:[[F:%.*]] = call ninf nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]]) +; CHECK-NEXT:ret float [[F]] +; + %cond = fcmp ninf olt float %a, %b + %f = select nnan ninf nsz i1 %cond, float %a, float %b + ret float %f +} + define float @minnum_olt_fmf_on_select(float %a, float %b) { ; CHECK-LABEL: @minnum_olt_fmf_on_select( ; CHECK-NEXT:[[F:%.*]] = call nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]]) @@ -395,7 +405,7 @@ define float @minnum_olt_fmf_on_select(float %a, float %b) { define <2 x float> @minnum_ole_fmf_on_select(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: @minnum_ole_fmf_on_select( -; CHECK-NEXT:[[F:%.*]] = call ninf nsz <2 x float> @llvm.minnum.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) +; CHECK-NEXT:[[F:%.*]] = call nsz <2 x float> @llvm.minnum.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) ; CHECK-NEXT:ret <2 x float> [[F]] ; %cond = fcmp ole <2 x float> %a, %b diff --git a/llvm/test/Transforms/InstCombine/unordered-fcmp-select.ll b/llvm/test/Transforms/InstCombine/unordered-fcmp-select.ll index 178795f9f9a83..ab4c997014699 100644 --- a/llvm/test/Transforms/InstCombine/unordered-fcmp-
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Do not fold logical is_finite test (#136851) (PR #137606)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: None (llvmbot) Changes Backport 8abc917fe04140b6c6088a67e0398f637efde808 Requested by: @dtcxzyw --- Full diff: https://github.com/llvm/llvm-project/pull/137606.diff 2 Files Affected: - (modified) llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (+3-1) - (modified) llvm/test/Transforms/InstCombine/and-fcmp.ll (+28) ``diff diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index ca8a20b4b7312..ebb84d177a832 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1475,7 +1475,9 @@ Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, } } - if (IsAnd && stripSignOnlyFPOps(LHS0) == stripSignOnlyFPOps(RHS0)) { + // This transform is not valid for a logical select. + if (!IsLogicalSelect && IsAnd && + stripSignOnlyFPOps(LHS0) == stripSignOnlyFPOps(RHS0)) { // and (fcmp ord x, 0), (fcmp u* x, inf) -> fcmp o* x, inf // and (fcmp ord x, 0), (fcmp u* fabs(x), inf) -> fcmp o* x, inf if (Value *Left = matchIsFiniteTest(Builder, LHS, RHS)) diff --git a/llvm/test/Transforms/InstCombine/and-fcmp.ll b/llvm/test/Transforms/InstCombine/and-fcmp.ll index c7bbc8ab56f9a..ec1b6ad2ea168 100644 --- a/llvm/test/Transforms/InstCombine/and-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/and-fcmp.ll @@ -4990,6 +4990,34 @@ define i1 @clang_builtin_isnormal_inf_check_copysign(half %x, half %y) { ret i1 %and } +define i1 @clang_builtin_isnormal_inf_check_copysign_logical_select(half %x, half %y) { +; CHECK-LABEL: @clang_builtin_isnormal_inf_check_copysign_logical_select( +; CHECK-NEXT:[[COPYSIGN_X:%.*]] = call half @llvm.copysign.f16(half [[X:%.*]], half [[Y:%.*]]) +; CHECK-NEXT:[[ORD:%.*]] = fcmp ord half [[X]], 0xH +; CHECK-NEXT:[[CMP:%.*]] = fcmp ueq half [[COPYSIGN_X]], 0xH7C00 +; CHECK-NEXT:[[AND:%.*]] = select i1 [[ORD]], i1 [[CMP]], i1 false +; CHECK-NEXT:ret i1 [[AND]] +; + %copysign.x = call half @llvm.copysign.f16(half %x, half %y) + %ord = fcmp ord half %x, 0.0 + %cmp = fcmp uge half %copysign.x, 0xH7C00 + %and = select i1 %ord, i1 %cmp, i1 false + ret i1 %and +} + +define i1 @clang_builtin_isnormal_inf_check_fabs_nnan_logical_select(half %x) { +; CHECK-LABEL: @clang_builtin_isnormal_inf_check_fabs_nnan_logical_select( +; CHECK-NEXT:[[COPYSIGN_X:%.*]] = call half @llvm.fabs.f16(half [[X:%.*]]) +; CHECK-NEXT:[[AND:%.*]] = fcmp oeq half [[COPYSIGN_X]], 0xH7C00 +; CHECK-NEXT:ret i1 [[AND]] +; + %copysign.x = call nnan half @llvm.fabs.f16(half %x) + %ord = fcmp ord half %x, 0.0 + %cmp = fcmp uge half %copysign.x, 0xH7C00 + %and = select i1 %ord, i1 %cmp, i1 false + ret i1 %and +} + define i1 @isnormal_logical_select_0(half %x) { ; CHECK-LABEL: @isnormal_logical_select_0( ; CHECK-NEXT:[[FABS_X:%.*]] = call half @llvm.fabs.f16(half [[X:%.*]]) `` https://github.com/llvm/llvm-project/pull/137606 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Fix ninf propagation for fcmp+sel -> minmax (#136433) (PR #137605)
nikic wrote: > > I don't think there is a need to backport FMF propagation fixes. > > Is there a policy to judge whether or not to backport a miscompilation bug > fix? Actually, it is unlikely to trigger this bug in real-world projects. But > this fix is simple and safe to be backported. There is https://llvm.org/docs/HowToReleaseLLVM.html#release-patch-rules, but it's not very useful :) I don't think there is much value in backporting theoretical miscompilation fixes to the release branch, but I don't particularly care in this case, as the patch itself is simple and unlikely to significantly affect anything. https://github.com/llvm/llvm-project/pull/137605 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Preserve signbit semantics of NaN with fold to fabs (#136648) (PR #137608)
llvmbot wrote: @arsenm What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/137608 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Preserve signbit semantics of NaN with fold to fabs (#136648) (PR #137608)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: None (llvmbot) Changes Backport 3e1e4062e1e95031c32c0ed9786647ef1a4141aa Requested by: @dtcxzyw --- Full diff: https://github.com/llvm/llvm-project/pull/137608.diff 2 Files Affected: - (modified) llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp (+8-1) - (modified) llvm/test/Transforms/InstCombine/fabs.ll (+46-11) ``diff diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 29c5cef84ccdb..9cd234dd3babf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2838,7 +2838,14 @@ static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI, // fold (X <= +/-0.0) ? (0.0 - X) : X to fabs(X), when 'Swap' is false // fold (X > +/-0.0) ? X : (0.0 - X) to fabs(X), when 'Swap' is true -if (match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X { +// Note: We require "nnan" for this fold because fcmp ignores the signbit +// of NAN, but IEEE-754 specifies the signbit of NAN values with +// fneg/fabs operations. +if (match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X))) && +(cast(CondVal)->hasNoNaNs() || SI.hasNoNaNs() || + isKnownNeverNaN(X, /*Depth=*/0, + IC.getSimplifyQuery().getWithInstruction( + cast(CondVal) { if (!Swap && (Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) { Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI); return IC.replaceInstUsesWith(SI, Fabs); diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll index 7b9a672f188ca..f449d4b8e6b37 100644 --- a/llvm/test/Transforms/InstCombine/fabs.ll +++ b/llvm/test/Transforms/InstCombine/fabs.ll @@ -256,6 +256,19 @@ define double @select_fcmp_ole_zero(double %x) { ; CHECK-LABEL: @select_fcmp_ole_zero( ; CHECK-NEXT:[[FABS:%.*]] = call double @llvm.fabs.f64(double [[X:%.*]]) ; CHECK-NEXT:ret double [[FABS]] +; + %lezero = fcmp nnan ole double %x, 0.0 + %negx = fsub double 0.0, %x + %fabs = select i1 %lezero, double %negx, double %x + ret double %fabs +} + +define double @select_fcmp_ole_zero_no_nnan(double %x) { +; CHECK-LABEL: @select_fcmp_ole_zero_no_nnan( +; CHECK-NEXT:[[LEZERO:%.*]] = fcmp ole double [[X:%.*]], 0.00e+00 +; CHECK-NEXT:[[NEGX:%.*]] = fsub double 0.00e+00, [[X]] +; CHECK-NEXT:[[FABS:%.*]] = select i1 [[LEZERO]], double [[NEGX]], double [[X]] +; CHECK-NEXT:ret double [[FABS]] ; %lezero = fcmp ole double %x, 0.0 %negx = fsub double 0.0, %x @@ -263,12 +276,34 @@ define double @select_fcmp_ole_zero(double %x) { ret double %fabs } +define double @select_fcmp_ole_zero_no_nnan_input_nofpclass_nan(double nofpclass(nan) %x) { +; CHECK-LABEL: @select_fcmp_ole_zero_no_nnan_input_nofpclass_nan( +; CHECK-NEXT:[[FABS:%.*]] = call double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT:ret double [[FABS]] +; + %lezero = fcmp ole double %x, 0.0 + %negx = fsub double 0.0, %x + %fabs = select i1 %lezero, double %negx, double %x + ret double %fabs +} + +define double @select_fcmp_ole_zero_select_nnan(double %x) { +; CHECK-LABEL: @select_fcmp_ole_zero_select_nnan( +; CHECK-NEXT:[[FABS:%.*]] = call nnan double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT:ret double [[FABS]] +; + %lezero = fcmp ole double %x, 0.0 + %negx = fsub double 0.0, %x + %fabs = select nnan i1 %lezero, double %negx, double %x + ret double %fabs +} + define double @select_fcmp_nnan_ole_zero(double %x) { ; CHECK-LABEL: @select_fcmp_nnan_ole_zero( ; CHECK-NEXT:[[FABS:%.*]] = call double @llvm.fabs.f64(double [[X:%.*]]) ; CHECK-NEXT:ret double [[FABS]] ; - %lezero = fcmp ole double %x, 0.0 + %lezero = fcmp nnan ole double %x, 0.0 %negx = fsub nnan double 0.0, %x %fabs = select i1 %lezero, double %negx, double %x ret double %fabs @@ -279,7 +314,7 @@ define double @select_nnan_fcmp_nnan_ole_zero(double %x) { ; CHECK-NEXT:[[FABS:%.*]] = call nnan double @llvm.fabs.f64(double [[X:%.*]]) ; CHECK-NEXT:ret double [[FABS]] ; - %lezero = fcmp ole double %x, 0.0 + %lezero = fcmp nnan ole double %x, 0.0 %negx = fsub nnan double 0.0, %x %fabs = select nnan i1 %lezero, double %negx, double %x ret double %fabs @@ -292,7 +327,7 @@ define double @select_fcmp_nnan_ule_zero(double %x) { ; CHECK-NEXT:[[FABS:%.*]] = call double @llvm.fabs.f64(double [[X:%.*]]) ; CHECK-NEXT:ret double [[FABS]] ; - %lezero = fcmp ule double %x, 0.0 + %lezero = fcmp nnan ule double %x, 0.0 %negx = fsub nnan double 0.0, %x %fabs = select i1 %lezero, double %negx, double %x ret double %fabs @@ -320,7 +355,7 @@ define <2 x float> @select_fcmp_nnan_ole_negzero(<2 x float> %x) { ; CHECK-NEXT:[[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Preserve signbit semantics of NaN with fold to fabs (#136648) (PR #137608)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/137608 Backport 3e1e4062e1e95031c32c0ed9786647ef1a4141aa Requested by: @dtcxzyw >From fbe3db8268cbb4f2a3e479520637a5bfd4e6a302 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sat, 26 Apr 2025 14:03:12 +0800 Subject: [PATCH] [InstCombine] Preserve signbit semantics of NaN with fold to fabs (#136648) As per the LangRef and IEEE 754-2008 standard, the sign bit of NaN is preserved if there is no floating-point operation being performed. See also https://github.com/llvm/llvm-project/commit/862e35e25a68502433da0a8d0819448ff5745339 for reference. Alive2: https://alive2.llvm.org/ce/z/QYtEGj Closes https://github.com/llvm/llvm-project/issues/136646 (cherry picked from commit 3e1e4062e1e95031c32c0ed9786647ef1a4141aa) --- .../InstCombine/InstCombineSelect.cpp | 9 ++- llvm/test/Transforms/InstCombine/fabs.ll | 57 +++ 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 29c5cef84ccdb..9cd234dd3babf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2838,7 +2838,14 @@ static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI, // fold (X <= +/-0.0) ? (0.0 - X) : X to fabs(X), when 'Swap' is false // fold (X > +/-0.0) ? X : (0.0 - X) to fabs(X), when 'Swap' is true -if (match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X { +// Note: We require "nnan" for this fold because fcmp ignores the signbit +// of NAN, but IEEE-754 specifies the signbit of NAN values with +// fneg/fabs operations. +if (match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X))) && +(cast(CondVal)->hasNoNaNs() || SI.hasNoNaNs() || + isKnownNeverNaN(X, /*Depth=*/0, + IC.getSimplifyQuery().getWithInstruction( + cast(CondVal) { if (!Swap && (Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) { Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI); return IC.replaceInstUsesWith(SI, Fabs); diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll index 7b9a672f188ca..f449d4b8e6b37 100644 --- a/llvm/test/Transforms/InstCombine/fabs.ll +++ b/llvm/test/Transforms/InstCombine/fabs.ll @@ -256,6 +256,19 @@ define double @select_fcmp_ole_zero(double %x) { ; CHECK-LABEL: @select_fcmp_ole_zero( ; CHECK-NEXT:[[FABS:%.*]] = call double @llvm.fabs.f64(double [[X:%.*]]) ; CHECK-NEXT:ret double [[FABS]] +; + %lezero = fcmp nnan ole double %x, 0.0 + %negx = fsub double 0.0, %x + %fabs = select i1 %lezero, double %negx, double %x + ret double %fabs +} + +define double @select_fcmp_ole_zero_no_nnan(double %x) { +; CHECK-LABEL: @select_fcmp_ole_zero_no_nnan( +; CHECK-NEXT:[[LEZERO:%.*]] = fcmp ole double [[X:%.*]], 0.00e+00 +; CHECK-NEXT:[[NEGX:%.*]] = fsub double 0.00e+00, [[X]] +; CHECK-NEXT:[[FABS:%.*]] = select i1 [[LEZERO]], double [[NEGX]], double [[X]] +; CHECK-NEXT:ret double [[FABS]] ; %lezero = fcmp ole double %x, 0.0 %negx = fsub double 0.0, %x @@ -263,12 +276,34 @@ define double @select_fcmp_ole_zero(double %x) { ret double %fabs } +define double @select_fcmp_ole_zero_no_nnan_input_nofpclass_nan(double nofpclass(nan) %x) { +; CHECK-LABEL: @select_fcmp_ole_zero_no_nnan_input_nofpclass_nan( +; CHECK-NEXT:[[FABS:%.*]] = call double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT:ret double [[FABS]] +; + %lezero = fcmp ole double %x, 0.0 + %negx = fsub double 0.0, %x + %fabs = select i1 %lezero, double %negx, double %x + ret double %fabs +} + +define double @select_fcmp_ole_zero_select_nnan(double %x) { +; CHECK-LABEL: @select_fcmp_ole_zero_select_nnan( +; CHECK-NEXT:[[FABS:%.*]] = call nnan double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT:ret double [[FABS]] +; + %lezero = fcmp ole double %x, 0.0 + %negx = fsub double 0.0, %x + %fabs = select nnan i1 %lezero, double %negx, double %x + ret double %fabs +} + define double @select_fcmp_nnan_ole_zero(double %x) { ; CHECK-LABEL: @select_fcmp_nnan_ole_zero( ; CHECK-NEXT:[[FABS:%.*]] = call double @llvm.fabs.f64(double [[X:%.*]]) ; CHECK-NEXT:ret double [[FABS]] ; - %lezero = fcmp ole double %x, 0.0 + %lezero = fcmp nnan ole double %x, 0.0 %negx = fsub nnan double 0.0, %x %fabs = select i1 %lezero, double %negx, double %x ret double %fabs @@ -279,7 +314,7 @@ define double @select_nnan_fcmp_nnan_ole_zero(double %x) { ; CHECK-NEXT:[[FABS:%.*]] = call nnan double @llvm.fabs.f64(double [[X:%.*]]) ; CHECK-NEXT:ret double [[FABS]] ; - %lezero = fcmp ole double %x, 0.0 + %lezero = fcmp nnan ole double %x, 0.0 %negx = fsub nnan double 0.0, %x %fabs = select nnan i1 %lezero, do
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Preserve signbit semantics of NaN with fold to fabs (#136648) (PR #137608)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/137608 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/20.x: [sanitizer_common] Remove interceptors for deprecated struct termio (#137403) (PR #137707)
hpax wrote: So I realize I'm coming late to this party, but there are other problems too: the termios ioctls (TCGETS, TCSETS*) are using the wrong structure, because struct termios comes from glibc, not from the kernel. I think that *both* issues can be solved by including instead of . Note that they are mutually exclusive, because that will import the kernel "struct termios". Finally, the sanitizer is completely missing the v2 termios ioctls, which also require to be included. Those are: TCGETS2 TCSETS2 TCSETSW2 TCSETSF2 Note that not all Linux platforms have these, because some simply didn't need them (the kernel native struct termios was "already" termios2.) That being said, these ioctls are all new enough to have the size and direction encoded in the ioctl number, so I don't know if that means that you don't need to have special code for them. https://github.com/llvm/llvm-project/pull/137707 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [llvm] Introduce callee_type metadata (PR #87573)
https://github.com/Prabhuk updated https://github.com/llvm/llvm-project/pull/87573 >From a8a5848885e12c771f12cfa33b4dbc6a0272e925 Mon Sep 17 00:00:00 2001 From: Prabhuk Date: Mon, 22 Apr 2024 11:34:04 -0700 Subject: [PATCH 01/13] Update clang/lib/CodeGen/CodeGenModule.cpp Cleaner if checks. Co-authored-by: Matt Arsenault --- clang/lib/CodeGen/CodeGenModule.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index e19bbee996f58..ff1586d2fa8ab 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2711,7 +2711,7 @@ void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, void CodeGenModule::CreateFunctionTypeMetadataForIcall(const QualType &QT, llvm::CallBase *CB) { // Only if needed for call graph section and only for indirect calls. - if (!(CodeGenOpts.CallGraphSection && CB && CB->isIndirectCall())) + if (!CodeGenOpts.CallGraphSection || !CB || !CB->isIndirectCall()) return; auto *MD = CreateMetadataIdentifierGeneralized(QT); >From 019b2ca5e1c263183ed114e0b967b4e77b4a17a8 Mon Sep 17 00:00:00 2001 From: Prabhuk Date: Mon, 22 Apr 2024 11:34:31 -0700 Subject: [PATCH 02/13] Update clang/lib/CodeGen/CodeGenModule.cpp Update the comments as suggested. Co-authored-by: Matt Arsenault --- clang/lib/CodeGen/CodeGenModule.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index ff1586d2fa8ab..5635a87d2358a 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2680,9 +2680,9 @@ void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, bool EmittedMDIdGeneralized = false; if (CodeGenOpts.CallGraphSection && (!F->hasLocalLinkage() || - F->getFunction().hasAddressTaken(nullptr, /* IgnoreCallbackUses */ true, -/* IgnoreAssumeLikeCalls */ true, -/* IgnoreLLVMUsed */ false))) { + F->getFunction().hasAddressTaken(nullptr, /*IgnoreCallbackUses=*/ true, +/*IgnoreAssumeLikeCalls=*/ true, +/*IgnoreLLVMUsed=*/ false))) { F->addTypeMetadata(0, CreateMetadataIdentifierGeneralized(FD->getType())); EmittedMDIdGeneralized = true; } >From 99242900c51778abd4b7e7f4361b09202b7abcda Mon Sep 17 00:00:00 2001 From: Prabhuk Date: Mon, 29 Apr 2024 11:53:40 -0700 Subject: [PATCH 03/13] dyn_cast to isa Created using spr 1.3.6-beta.1 --- clang/lib/CodeGen/CGCall.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 526a63b24ff83..45033ced1d834 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5713,8 +5713,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (callOrInvoke && *callOrInvoke && (*callOrInvoke)->isIndirectCall()) { if (const FunctionDecl *FD = dyn_cast_or_null(TargetDecl)) { // Type id metadata is set only for C/C++ contexts. -if (dyn_cast(FD) || dyn_cast(FD) || -dyn_cast(FD)) { +if (isa(FD) || isa(FD) || +isa(FD)) { CGM.CreateFunctionTypeMetadataForIcall(FD->getType(), *callOrInvoke); } } >From 24882b15939b781bcf28d87fdf4f6e8834b6cfde Mon Sep 17 00:00:00 2001 From: prabhukr Date: Tue, 10 Dec 2024 14:54:27 -0800 Subject: [PATCH 04/13] Address review comments. Break llvm and clang patches. Created using spr 1.3.6-beta.1 --- llvm/lib/IR/Verifier.cpp | 7 +++ llvm/test/Verifier/operand-bundles.ll | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 0ad7ba555bfad..b72672e7b8e56 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -3707,10 +3707,9 @@ void Verifier::visitCallBase(CallBase &Call) { if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) visitIntrinsicCall(ID, Call); - // Verify that a callsite has at most one "deopt", at most one "funclet", at - // most one "gc-transition", at most one "cfguardtarget", at most one "type", - // at most one "preallocated" operand bundle, and at most one "ptrauth" - // operand bundle. + // Verify that a callsite has at most one operand bundle for each of the + // following: "deopt", "funclet", "gc-transition", "cfguardtarget", "type", + // "preallocated", and "ptrauth". bool FoundDeoptBundle = false, FoundFuncletBundle = false, FoundGCTransitionBundle = false, FoundCFGuardTargetBundle = false, FoundPreallocatedBundle = false, FoundGCLiveBundle = false, diff --git a/llvm/test/Verifier/operand-bundles.ll b/llvm/t
[llvm-branch-commits] [llvm] [llvm] Extract and propagate indirect call type id (PR #87575)
https://github.com/Prabhuk updated https://github.com/llvm/llvm-project/pull/87575 >From 1a8d810d352fbe84c0521c7614689b60ade693c8 Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Tue, 19 Nov 2024 15:25:34 -0800 Subject: [PATCH 1/5] Fixed the tests and addressed most of the review comments. Created using spr 1.3.6-beta.1 --- llvm/include/llvm/CodeGen/MachineFunction.h | 15 +++-- .../CodeGen/AArch64/call-site-info-typeid.ll | 28 +++-- .../test/CodeGen/ARM/call-site-info-typeid.ll | 28 +++-- .../CodeGen/MIR/X86/call-site-info-typeid.ll | 58 --- .../CodeGen/MIR/X86/call-site-info-typeid.mir | 13 ++--- .../CodeGen/Mips/call-site-info-typeid.ll | 28 +++-- .../test/CodeGen/X86/call-site-info-typeid.ll | 28 +++-- 7 files changed, 71 insertions(+), 127 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index bb0b87a3a04a3..44633df38a651 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -493,7 +493,7 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { /// Callee type id. ConstantInt *TypeId = nullptr; -CallSiteInfo() {} +CallSiteInfo() = default; /// Extracts the numeric type id from the CallBase's type operand bundle, /// and sets TypeId. This is used as type id for the indirect call in the @@ -503,12 +503,11 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { if (!CB.isIndirectCall()) return; - auto Opt = CB.getOperandBundle(LLVMContext::OB_type); - if (!Opt.has_value()) { -errs() << "warning: cannot find indirect call type operand bundle for " - "call graph section\n"; + std::optional Opt = + CB.getOperandBundle(LLVMContext::OB_type); + // Return if the operand bundle for call graph section cannot be found. + if (!Opt.has_value()) return; - } // Get generalized type id string auto OB = Opt.value(); @@ -520,9 +519,9 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { "invalid type identifier"); // Compute numeric type id from generalized type id string - uint64_t TypeIdVal = llvm::MD5Hash(TypeIdStr->getString()); + uint64_t TypeIdVal = MD5Hash(TypeIdStr->getString()); IntegerType *Int64Ty = Type::getInt64Ty(CB.getContext()); - TypeId = llvm::ConstantInt::get(Int64Ty, TypeIdVal, /*IsSigned=*/false); + TypeId = ConstantInt::get(Int64Ty, TypeIdVal, /*IsSigned=*/false); } }; diff --git a/llvm/test/CodeGen/AArch64/call-site-info-typeid.ll b/llvm/test/CodeGen/AArch64/call-site-info-typeid.ll index f0a6b44755c5c..f3b98c2c7a395 100644 --- a/llvm/test/CodeGen/AArch64/call-site-info-typeid.ll +++ b/llvm/test/CodeGen/AArch64/call-site-info-typeid.ll @@ -1,14 +1,9 @@ -; Tests that call site type ids can be extracted and set from type operand -; bundles. +;; Tests that call site type ids can be extracted and set from type operand +;; bundles. -; Verify the exact typeId value to ensure it is not garbage but the value -; computed as the type id from the type operand bundle. -; RUN: llc --call-graph-section -mtriple aarch64-linux-gnu %s -stop-before=finalize-isel -o - | FileCheck %s - -; ModuleID = 'test.c' -source_filename = "test.c" -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64-unknown-linux-gnu" +;; Verify the exact typeId value to ensure it is not garbage but the value +;; computed as the type id from the type operand bundle. +; RUN: llc --call-graph-section -mtriple aarch64-linux-gnu < %s -stop-before=finalize-isel -o - | FileCheck %s define dso_local void @foo(i8 signext %a) !type !3 { entry: @@ -19,10 +14,10 @@ entry: define dso_local i32 @main() !type !4 { entry: %retval = alloca i32, align 4 - %fp = alloca void (i8)*, align 8 - store i32 0, i32* %retval, align 4 - store void (i8)* @foo, void (i8)** %fp, align 8 - %0 = load void (i8)*, void (i8)** %fp, align 8 + %fp = alloca ptr, align 8 + store i32 0, ptr %retval, align 4 + store ptr @foo, ptr %fp, align 8 + %0 = load ptr, ptr %fp, align 8 ; CHECK: callSites: ; CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [], typeId: ; CHECK-NEXT: 7854600665770582568 } @@ -30,10 +25,5 @@ entry: ret i32 0 } -!llvm.module.flags = !{!0, !1, !2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"uwtable", i32 1} -!2 = !{i32 7, !"frame-pointer", i32 2} !3 = !{i64 0, !"_ZTSFvcE.generalized"} !4 = !{i64 0, !"_ZTSFiE.generalized"} diff --git a/llvm/test/CodeGen/ARM/call-site-info-typeid.ll b/llvm/test/CodeGen/ARM/call-site-info-typeid.ll index ec7f8a425051b..9feeef9a564cc 100644 --- a/llvm/test/CodeGen/ARM/call-site-info-typeid.ll +++ b/llvm/test/CodeGen/ARM/call-site-info-typeid.ll @@ -1,14 +1,9 @@ -; Tests that call site type ids can be extracted and set from type operand -; bundles. +;; Tests that ca
[llvm-branch-commits] [llvm] [llvm][AsmPrinter] Emit call graph section (PR #87576)
https://github.com/Prabhuk updated https://github.com/llvm/llvm-project/pull/87576 >From 6b67376bd5e1f21606017c83cc67f2186ba36a33 Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Thu, 13 Mar 2025 01:41:04 + Subject: [PATCH 1/4] Updated the test as reviewers suggested. Created using spr 1.3.6-beta.1 --- llvm/test/CodeGen/X86/call-graph-section.ll | 66 +++ llvm/test/CodeGen/call-graph-section.ll | 73 - 2 files changed, 66 insertions(+), 73 deletions(-) create mode 100644 llvm/test/CodeGen/X86/call-graph-section.ll delete mode 100644 llvm/test/CodeGen/call-graph-section.ll diff --git a/llvm/test/CodeGen/X86/call-graph-section.ll b/llvm/test/CodeGen/X86/call-graph-section.ll new file mode 100644 index 0..a77a2b8051ed3 --- /dev/null +++ b/llvm/test/CodeGen/X86/call-graph-section.ll @@ -0,0 +1,66 @@ +;; Tests that we store the type identifiers in .callgraph section of the binary. + +; RUN: llc --call-graph-section -filetype=obj -o - < %s | \ +; RUN: llvm-readelf -x .callgraph - | FileCheck %s + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local void @foo() #0 !type !4 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @bar(i8 signext %a) #0 !type !5 { +entry: + %a.addr = alloca i8, align 1 + store i8 %a, ptr %a.addr, align 1 + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local ptr @baz(ptr %a) #0 !type !6 { +entry: + %a.addr = alloca ptr, align 8 + store ptr %a, ptr %a.addr, align 8 + ret ptr null +} + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local void @main() #0 !type !7 { +entry: + %retval = alloca i32, align 4 + %fp_foo = alloca ptr, align 8 + %a = alloca i8, align 1 + %fp_bar = alloca ptr, align 8 + %fp_baz = alloca ptr, align 8 + store i32 0, ptr %retval, align 4 + store ptr @foo, ptr %fp_foo, align 8 + %0 = load ptr, ptr %fp_foo, align 8 + call void (...) %0() [ "callee_type"(metadata !"_ZTSFvE.generalized") ] + store ptr @bar, ptr %fp_bar, align 8 + %1 = load ptr, ptr %fp_bar, align 8 + %2 = load i8, ptr %a, align 1 + %call = call i32 %1(i8 signext %2) [ "callee_type"(metadata !"_ZTSFicE.generalized") ] + store ptr @baz, ptr %fp_baz, align 8 + %3 = load ptr, ptr %fp_baz, align 8 + %call1 = call ptr %3(ptr %a) [ "callee_type"(metadata !"_ZTSFPvS_E.generalized") ] + call void @foo() [ "callee_type"(metadata !"_ZTSFvE.generalized") ] + %4 = load i8, ptr %a, align 1 + %call2 = call i32 @bar(i8 signext %4) [ "callee_type"(metadata !"_ZTSFicE.generalized") ] + %call3 = call ptr @baz(ptr %a) [ "callee_type"(metadata !"_ZTSFPvS_E.generalized") ] + ret void +} + +;; Check that the numeric type id (md5 hash) for the below type ids are emitted +;; to the callgraph section. + +; CHECK: Hex dump of section '.callgraph': + +; CHECK-DAG: 2444f731 f5eecb3e +!4 = !{i64 0, !"_ZTSFvE.generalized"} +; CHECK-DAG: 5486bc59 814b8e30 +!5 = !{i64 0, !"_ZTSFicE.generalized"} +; CHECK-DAG: 7ade6814 f897fd77 +!6 = !{i64 0, !"_ZTSFPvS_E.generalized"} +; CHECK-DAG: caaf769a 600968fa +!7 = !{i64 0, !"_ZTSFiE.generalized"} diff --git a/llvm/test/CodeGen/call-graph-section.ll b/llvm/test/CodeGen/call-graph-section.ll deleted file mode 100644 index bb158d11e82c9..0 --- a/llvm/test/CodeGen/call-graph-section.ll +++ /dev/null @@ -1,73 +0,0 @@ -; Tests that we store the type identifiers in .callgraph section of the binary. - -; RUN: llc --call-graph-section -filetype=obj -o - < %s | \ -; RUN: llvm-readelf -x .callgraph - | FileCheck %s - -target triple = "x86_64-unknown-linux-gnu" - -define dso_local void @foo() #0 !type !4 { -entry: - ret void -} - -define dso_local i32 @bar(i8 signext %a) #0 !type !5 { -entry: - %a.addr = alloca i8, align 1 - store i8 %a, i8* %a.addr, align 1 - ret i32 0 -} - -define dso_local i32* @baz(i8* %a) #0 !type !6 { -entry: - %a.addr = alloca i8*, align 8 - store i8* %a, i8** %a.addr, align 8 - ret i32* null -} - -define dso_local i32 @main() #0 !type !7 { -entry: - %retval = alloca i32, align 4 - %fp_foo = alloca void (...)*, align 8 - %a = alloca i8, align 1 - %fp_bar = alloca i32 (i8)*, align 8 - %fp_baz = alloca i32* (i8*)*, align 8 - store i32 0, i32* %retval, align 4 - store void (...)* bitcast (void ()* @foo to void (...)*), void (...)** %fp_foo, align 8 - %0 = load void (...)*, void (...)** %fp_foo, align 8 - call void (...) %0() [ "callee_type"(metadata !"_ZTSFvE.generalized") ] - store i32 (i8)* @bar, i32 (i8)** %fp_bar, align 8 - %1 = load i32 (i8)*, i32 (i8)** %fp_bar, align 8 - %2 = load i8, i8* %a, align 1 - %call = call i32 %1(i8 signext %2) [ "callee_type"(metadata !"_ZTSFicE.generalized") ] - store i32* (i8*)* @baz, i32* (i8*)** %fp_baz, align 8 - %3 = load i32* (i8*)*, i32* (i8*)** %fp_baz, align 8 - %call1 = call i32* %3(i8* %a) [ "callee_type"(metadata !"_ZTSFPvS_E.generalized") ] - call void @foo() [ "callee_type"(meta
[llvm-branch-commits] [llvm] [llvm] Extract and propagate indirect call type id (PR #87575)
https://github.com/Prabhuk updated https://github.com/llvm/llvm-project/pull/87575 >From 1a8d810d352fbe84c0521c7614689b60ade693c8 Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Tue, 19 Nov 2024 15:25:34 -0800 Subject: [PATCH 1/5] Fixed the tests and addressed most of the review comments. Created using spr 1.3.6-beta.1 --- llvm/include/llvm/CodeGen/MachineFunction.h | 15 +++-- .../CodeGen/AArch64/call-site-info-typeid.ll | 28 +++-- .../test/CodeGen/ARM/call-site-info-typeid.ll | 28 +++-- .../CodeGen/MIR/X86/call-site-info-typeid.ll | 58 --- .../CodeGen/MIR/X86/call-site-info-typeid.mir | 13 ++--- .../CodeGen/Mips/call-site-info-typeid.ll | 28 +++-- .../test/CodeGen/X86/call-site-info-typeid.ll | 28 +++-- 7 files changed, 71 insertions(+), 127 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index bb0b87a3a04a3..44633df38a651 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -493,7 +493,7 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { /// Callee type id. ConstantInt *TypeId = nullptr; -CallSiteInfo() {} +CallSiteInfo() = default; /// Extracts the numeric type id from the CallBase's type operand bundle, /// and sets TypeId. This is used as type id for the indirect call in the @@ -503,12 +503,11 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { if (!CB.isIndirectCall()) return; - auto Opt = CB.getOperandBundle(LLVMContext::OB_type); - if (!Opt.has_value()) { -errs() << "warning: cannot find indirect call type operand bundle for " - "call graph section\n"; + std::optional Opt = + CB.getOperandBundle(LLVMContext::OB_type); + // Return if the operand bundle for call graph section cannot be found. + if (!Opt.has_value()) return; - } // Get generalized type id string auto OB = Opt.value(); @@ -520,9 +519,9 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { "invalid type identifier"); // Compute numeric type id from generalized type id string - uint64_t TypeIdVal = llvm::MD5Hash(TypeIdStr->getString()); + uint64_t TypeIdVal = MD5Hash(TypeIdStr->getString()); IntegerType *Int64Ty = Type::getInt64Ty(CB.getContext()); - TypeId = llvm::ConstantInt::get(Int64Ty, TypeIdVal, /*IsSigned=*/false); + TypeId = ConstantInt::get(Int64Ty, TypeIdVal, /*IsSigned=*/false); } }; diff --git a/llvm/test/CodeGen/AArch64/call-site-info-typeid.ll b/llvm/test/CodeGen/AArch64/call-site-info-typeid.ll index f0a6b44755c5c..f3b98c2c7a395 100644 --- a/llvm/test/CodeGen/AArch64/call-site-info-typeid.ll +++ b/llvm/test/CodeGen/AArch64/call-site-info-typeid.ll @@ -1,14 +1,9 @@ -; Tests that call site type ids can be extracted and set from type operand -; bundles. +;; Tests that call site type ids can be extracted and set from type operand +;; bundles. -; Verify the exact typeId value to ensure it is not garbage but the value -; computed as the type id from the type operand bundle. -; RUN: llc --call-graph-section -mtriple aarch64-linux-gnu %s -stop-before=finalize-isel -o - | FileCheck %s - -; ModuleID = 'test.c' -source_filename = "test.c" -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64-unknown-linux-gnu" +;; Verify the exact typeId value to ensure it is not garbage but the value +;; computed as the type id from the type operand bundle. +; RUN: llc --call-graph-section -mtriple aarch64-linux-gnu < %s -stop-before=finalize-isel -o - | FileCheck %s define dso_local void @foo(i8 signext %a) !type !3 { entry: @@ -19,10 +14,10 @@ entry: define dso_local i32 @main() !type !4 { entry: %retval = alloca i32, align 4 - %fp = alloca void (i8)*, align 8 - store i32 0, i32* %retval, align 4 - store void (i8)* @foo, void (i8)** %fp, align 8 - %0 = load void (i8)*, void (i8)** %fp, align 8 + %fp = alloca ptr, align 8 + store i32 0, ptr %retval, align 4 + store ptr @foo, ptr %fp, align 8 + %0 = load ptr, ptr %fp, align 8 ; CHECK: callSites: ; CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [], typeId: ; CHECK-NEXT: 7854600665770582568 } @@ -30,10 +25,5 @@ entry: ret i32 0 } -!llvm.module.flags = !{!0, !1, !2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"uwtable", i32 1} -!2 = !{i32 7, !"frame-pointer", i32 2} !3 = !{i64 0, !"_ZTSFvcE.generalized"} !4 = !{i64 0, !"_ZTSFiE.generalized"} diff --git a/llvm/test/CodeGen/ARM/call-site-info-typeid.ll b/llvm/test/CodeGen/ARM/call-site-info-typeid.ll index ec7f8a425051b..9feeef9a564cc 100644 --- a/llvm/test/CodeGen/ARM/call-site-info-typeid.ll +++ b/llvm/test/CodeGen/ARM/call-site-info-typeid.ll @@ -1,14 +1,9 @@ -; Tests that call site type ids can be extracted and set from type operand -; bundles. +;; Tests that ca
[llvm-branch-commits] [llvm] [llvm] Add option to emit `callgraph` section (PR #87574)
https://github.com/Prabhuk updated https://github.com/llvm/llvm-project/pull/87574 >From 1d7ee612e408ee7e64e984eb08e6d7089a435d09 Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Sun, 2 Feb 2025 00:58:49 + Subject: [PATCH 1/6] Simplify MIR test. Created using spr 1.3.6-beta.1 --- .../CodeGen/MIR/X86/call-site-info-typeid.mir | 21 ++- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/llvm/test/CodeGen/MIR/X86/call-site-info-typeid.mir b/llvm/test/CodeGen/MIR/X86/call-site-info-typeid.mir index 5ab797bfcc18f..a99ee50a608fb 100644 --- a/llvm/test/CodeGen/MIR/X86/call-site-info-typeid.mir +++ b/llvm/test/CodeGen/MIR/X86/call-site-info-typeid.mir @@ -8,11 +8,6 @@ # CHECK-NEXT: 123456789 } --- | - ; ModuleID = 'test.ll' - source_filename = "test.ll" - target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" - target triple = "x86_64-unknown-linux-gnu" - define dso_local void @foo(i8 signext %a) { entry: ret void @@ -21,10 +16,10 @@ define dso_local i32 @main() { entry: %retval = alloca i32, align 4 -%fp = alloca void (i8)*, align 8 -store i32 0, i32* %retval, align 4 -store void (i8)* @foo, void (i8)** %fp, align 8 -%0 = load void (i8)*, void (i8)** %fp, align 8 +%fp = alloca ptr, align 8 +store i32 0, ptr %retval, align 4 +store ptr @foo, ptr %fp, align 8 +%0 = load ptr, ptr %fp, align 8 call void %0(i8 signext 97) ret i32 0 } @@ -42,12 +37,8 @@ body: | name:main tracksRegLiveness: true stack: - - { id: 0, name: retval, type: default, offset: 0, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: fp, type: default, offset: 0, size: 8, alignment: 8, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 0, name: retval, size: 4, alignment: 4 } + - { id: 1, name: fp, size: 8, alignment: 8 } callSites: - { bb: 0, offset: 6, fwdArgRegs: [], typeId: 123456789 } >From 86e2c9dc37170499252ed50c6bbef2931e106fbb Mon Sep 17 00:00:00 2001 From: prabhukr Date: Thu, 13 Mar 2025 01:03:40 + Subject: [PATCH 2/6] Add requested tests part 1. Created using spr 1.3.6-beta.1 --- ...te-info-ambiguous-indirect-call-typeid.mir | 145 ++ .../call-site-info-direct-calls-typeid.mir| 145 ++ 2 files changed, 290 insertions(+) create mode 100644 llvm/test/CodeGen/MIR/X86/call-site-info-ambiguous-indirect-call-typeid.mir create mode 100644 llvm/test/CodeGen/MIR/X86/call-site-info-direct-calls-typeid.mir diff --git a/llvm/test/CodeGen/MIR/X86/call-site-info-ambiguous-indirect-call-typeid.mir b/llvm/test/CodeGen/MIR/X86/call-site-info-ambiguous-indirect-call-typeid.mir new file mode 100644 index 0..9d1b099cc9093 --- /dev/null +++ b/llvm/test/CodeGen/MIR/X86/call-site-info-ambiguous-indirect-call-typeid.mir @@ -0,0 +1,145 @@ +# Test MIR printer and parser for type id field in callSites. It is used +# for propogating call site type identifiers to emit in the call graph section. + +# RUN: llc --call-graph-section %s -run-pass=none -o - | FileCheck %s +# CHECK: name: main +# CHECK: callSites: +# CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [] +# CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [], typeId: +# CHECK-NEXT: 1234567890 } + +--- | + ; Function Attrs: mustprogress noinline nounwind optnone uwtable + define dso_local noundef i32 @_Z3addii(i32 noundef %a, i32 noundef %b) #0 !type !6 !type !6 { + entry: +%a.addr = alloca i32, align 4 +%b.addr = alloca i32, align 4 +store i32 %a, ptr %a.addr, align 4 +store i32 %b, ptr %b.addr, align 4 +%0 = load i32, ptr %a.addr, align 4 +%1 = load i32, ptr %b.addr, align 4 +%add = add nsw i32 %0, %1 +ret i32 %add + } + + ; Function Attrs: mustprogress noinline nounwind optnone uwtable + define dso_local noundef i32 @_Z8multiplyii(i32 noundef %a, i32 noundef %b) #0 !type !6 !type !6 { + entry: +%a.addr = alloca i32, align 4 +%b.addr = alloca i32, align 4 +store i32 %a, ptr %a.addr, align 4 +store i32 %b, ptr %b.addr, align 4 +%0 = load i32, ptr %a.addr, align 4 +%1 = load i32, ptr %b.addr, align 4 +%mul = mul nsw i32 %0, %1 +ret i32 %mul + } + + ; Function Attrs: mustprogress noinline nounwind optnone uwtable + define dso_local noundef ptr @_Z13get_operationb(i1 noundef zeroext %is_addition) #0 !type !7 !type !7 { + entry: +%is_addition.addr = alloca i8, align 1 +%storedv = zext i1 %is_addition to i8 +store i8 %storedv, ptr %is_addition.addr, align 1 +%0 = load i8, ptr %is_addition.addr, align 1 +%loadedv = trunc i8 %0 to i1 +br i1 %loade
[llvm-branch-commits] [llvm] [llvm] Add option to emit `callgraph` section (PR #87574)
https://github.com/Prabhuk updated https://github.com/llvm/llvm-project/pull/87574 >From 1d7ee612e408ee7e64e984eb08e6d7089a435d09 Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Sun, 2 Feb 2025 00:58:49 + Subject: [PATCH 1/6] Simplify MIR test. Created using spr 1.3.6-beta.1 --- .../CodeGen/MIR/X86/call-site-info-typeid.mir | 21 ++- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/llvm/test/CodeGen/MIR/X86/call-site-info-typeid.mir b/llvm/test/CodeGen/MIR/X86/call-site-info-typeid.mir index 5ab797bfcc18f..a99ee50a608fb 100644 --- a/llvm/test/CodeGen/MIR/X86/call-site-info-typeid.mir +++ b/llvm/test/CodeGen/MIR/X86/call-site-info-typeid.mir @@ -8,11 +8,6 @@ # CHECK-NEXT: 123456789 } --- | - ; ModuleID = 'test.ll' - source_filename = "test.ll" - target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" - target triple = "x86_64-unknown-linux-gnu" - define dso_local void @foo(i8 signext %a) { entry: ret void @@ -21,10 +16,10 @@ define dso_local i32 @main() { entry: %retval = alloca i32, align 4 -%fp = alloca void (i8)*, align 8 -store i32 0, i32* %retval, align 4 -store void (i8)* @foo, void (i8)** %fp, align 8 -%0 = load void (i8)*, void (i8)** %fp, align 8 +%fp = alloca ptr, align 8 +store i32 0, ptr %retval, align 4 +store ptr @foo, ptr %fp, align 8 +%0 = load ptr, ptr %fp, align 8 call void %0(i8 signext 97) ret i32 0 } @@ -42,12 +37,8 @@ body: | name:main tracksRegLiveness: true stack: - - { id: 0, name: retval, type: default, offset: 0, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: fp, type: default, offset: 0, size: 8, alignment: 8, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 0, name: retval, size: 4, alignment: 4 } + - { id: 1, name: fp, size: 8, alignment: 8 } callSites: - { bb: 0, offset: 6, fwdArgRegs: [], typeId: 123456789 } >From 86e2c9dc37170499252ed50c6bbef2931e106fbb Mon Sep 17 00:00:00 2001 From: prabhukr Date: Thu, 13 Mar 2025 01:03:40 + Subject: [PATCH 2/6] Add requested tests part 1. Created using spr 1.3.6-beta.1 --- ...te-info-ambiguous-indirect-call-typeid.mir | 145 ++ .../call-site-info-direct-calls-typeid.mir| 145 ++ 2 files changed, 290 insertions(+) create mode 100644 llvm/test/CodeGen/MIR/X86/call-site-info-ambiguous-indirect-call-typeid.mir create mode 100644 llvm/test/CodeGen/MIR/X86/call-site-info-direct-calls-typeid.mir diff --git a/llvm/test/CodeGen/MIR/X86/call-site-info-ambiguous-indirect-call-typeid.mir b/llvm/test/CodeGen/MIR/X86/call-site-info-ambiguous-indirect-call-typeid.mir new file mode 100644 index 0..9d1b099cc9093 --- /dev/null +++ b/llvm/test/CodeGen/MIR/X86/call-site-info-ambiguous-indirect-call-typeid.mir @@ -0,0 +1,145 @@ +# Test MIR printer and parser for type id field in callSites. It is used +# for propogating call site type identifiers to emit in the call graph section. + +# RUN: llc --call-graph-section %s -run-pass=none -o - | FileCheck %s +# CHECK: name: main +# CHECK: callSites: +# CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [] +# CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [], typeId: +# CHECK-NEXT: 1234567890 } + +--- | + ; Function Attrs: mustprogress noinline nounwind optnone uwtable + define dso_local noundef i32 @_Z3addii(i32 noundef %a, i32 noundef %b) #0 !type !6 !type !6 { + entry: +%a.addr = alloca i32, align 4 +%b.addr = alloca i32, align 4 +store i32 %a, ptr %a.addr, align 4 +store i32 %b, ptr %b.addr, align 4 +%0 = load i32, ptr %a.addr, align 4 +%1 = load i32, ptr %b.addr, align 4 +%add = add nsw i32 %0, %1 +ret i32 %add + } + + ; Function Attrs: mustprogress noinline nounwind optnone uwtable + define dso_local noundef i32 @_Z8multiplyii(i32 noundef %a, i32 noundef %b) #0 !type !6 !type !6 { + entry: +%a.addr = alloca i32, align 4 +%b.addr = alloca i32, align 4 +store i32 %a, ptr %a.addr, align 4 +store i32 %b, ptr %b.addr, align 4 +%0 = load i32, ptr %a.addr, align 4 +%1 = load i32, ptr %b.addr, align 4 +%mul = mul nsw i32 %0, %1 +ret i32 %mul + } + + ; Function Attrs: mustprogress noinline nounwind optnone uwtable + define dso_local noundef ptr @_Z13get_operationb(i1 noundef zeroext %is_addition) #0 !type !7 !type !7 { + entry: +%is_addition.addr = alloca i8, align 1 +%storedv = zext i1 %is_addition to i8 +store i8 %storedv, ptr %is_addition.addr, align 1 +%0 = load i8, ptr %is_addition.addr, align 1 +%loadedv = trunc i8 %0 to i1 +br i1 %loade
[llvm-branch-commits] [llvm] [llvm][AsmPrinter] Emit call graph section (PR #87576)
https://github.com/Prabhuk updated https://github.com/llvm/llvm-project/pull/87576 >From 6b67376bd5e1f21606017c83cc67f2186ba36a33 Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Thu, 13 Mar 2025 01:41:04 + Subject: [PATCH 1/4] Updated the test as reviewers suggested. Created using spr 1.3.6-beta.1 --- llvm/test/CodeGen/X86/call-graph-section.ll | 66 +++ llvm/test/CodeGen/call-graph-section.ll | 73 - 2 files changed, 66 insertions(+), 73 deletions(-) create mode 100644 llvm/test/CodeGen/X86/call-graph-section.ll delete mode 100644 llvm/test/CodeGen/call-graph-section.ll diff --git a/llvm/test/CodeGen/X86/call-graph-section.ll b/llvm/test/CodeGen/X86/call-graph-section.ll new file mode 100644 index 0..a77a2b8051ed3 --- /dev/null +++ b/llvm/test/CodeGen/X86/call-graph-section.ll @@ -0,0 +1,66 @@ +;; Tests that we store the type identifiers in .callgraph section of the binary. + +; RUN: llc --call-graph-section -filetype=obj -o - < %s | \ +; RUN: llvm-readelf -x .callgraph - | FileCheck %s + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local void @foo() #0 !type !4 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @bar(i8 signext %a) #0 !type !5 { +entry: + %a.addr = alloca i8, align 1 + store i8 %a, ptr %a.addr, align 1 + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local ptr @baz(ptr %a) #0 !type !6 { +entry: + %a.addr = alloca ptr, align 8 + store ptr %a, ptr %a.addr, align 8 + ret ptr null +} + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local void @main() #0 !type !7 { +entry: + %retval = alloca i32, align 4 + %fp_foo = alloca ptr, align 8 + %a = alloca i8, align 1 + %fp_bar = alloca ptr, align 8 + %fp_baz = alloca ptr, align 8 + store i32 0, ptr %retval, align 4 + store ptr @foo, ptr %fp_foo, align 8 + %0 = load ptr, ptr %fp_foo, align 8 + call void (...) %0() [ "callee_type"(metadata !"_ZTSFvE.generalized") ] + store ptr @bar, ptr %fp_bar, align 8 + %1 = load ptr, ptr %fp_bar, align 8 + %2 = load i8, ptr %a, align 1 + %call = call i32 %1(i8 signext %2) [ "callee_type"(metadata !"_ZTSFicE.generalized") ] + store ptr @baz, ptr %fp_baz, align 8 + %3 = load ptr, ptr %fp_baz, align 8 + %call1 = call ptr %3(ptr %a) [ "callee_type"(metadata !"_ZTSFPvS_E.generalized") ] + call void @foo() [ "callee_type"(metadata !"_ZTSFvE.generalized") ] + %4 = load i8, ptr %a, align 1 + %call2 = call i32 @bar(i8 signext %4) [ "callee_type"(metadata !"_ZTSFicE.generalized") ] + %call3 = call ptr @baz(ptr %a) [ "callee_type"(metadata !"_ZTSFPvS_E.generalized") ] + ret void +} + +;; Check that the numeric type id (md5 hash) for the below type ids are emitted +;; to the callgraph section. + +; CHECK: Hex dump of section '.callgraph': + +; CHECK-DAG: 2444f731 f5eecb3e +!4 = !{i64 0, !"_ZTSFvE.generalized"} +; CHECK-DAG: 5486bc59 814b8e30 +!5 = !{i64 0, !"_ZTSFicE.generalized"} +; CHECK-DAG: 7ade6814 f897fd77 +!6 = !{i64 0, !"_ZTSFPvS_E.generalized"} +; CHECK-DAG: caaf769a 600968fa +!7 = !{i64 0, !"_ZTSFiE.generalized"} diff --git a/llvm/test/CodeGen/call-graph-section.ll b/llvm/test/CodeGen/call-graph-section.ll deleted file mode 100644 index bb158d11e82c9..0 --- a/llvm/test/CodeGen/call-graph-section.ll +++ /dev/null @@ -1,73 +0,0 @@ -; Tests that we store the type identifiers in .callgraph section of the binary. - -; RUN: llc --call-graph-section -filetype=obj -o - < %s | \ -; RUN: llvm-readelf -x .callgraph - | FileCheck %s - -target triple = "x86_64-unknown-linux-gnu" - -define dso_local void @foo() #0 !type !4 { -entry: - ret void -} - -define dso_local i32 @bar(i8 signext %a) #0 !type !5 { -entry: - %a.addr = alloca i8, align 1 - store i8 %a, i8* %a.addr, align 1 - ret i32 0 -} - -define dso_local i32* @baz(i8* %a) #0 !type !6 { -entry: - %a.addr = alloca i8*, align 8 - store i8* %a, i8** %a.addr, align 8 - ret i32* null -} - -define dso_local i32 @main() #0 !type !7 { -entry: - %retval = alloca i32, align 4 - %fp_foo = alloca void (...)*, align 8 - %a = alloca i8, align 1 - %fp_bar = alloca i32 (i8)*, align 8 - %fp_baz = alloca i32* (i8*)*, align 8 - store i32 0, i32* %retval, align 4 - store void (...)* bitcast (void ()* @foo to void (...)*), void (...)** %fp_foo, align 8 - %0 = load void (...)*, void (...)** %fp_foo, align 8 - call void (...) %0() [ "callee_type"(metadata !"_ZTSFvE.generalized") ] - store i32 (i8)* @bar, i32 (i8)** %fp_bar, align 8 - %1 = load i32 (i8)*, i32 (i8)** %fp_bar, align 8 - %2 = load i8, i8* %a, align 1 - %call = call i32 %1(i8 signext %2) [ "callee_type"(metadata !"_ZTSFicE.generalized") ] - store i32* (i8*)* @baz, i32* (i8*)** %fp_baz, align 8 - %3 = load i32* (i8*)*, i32* (i8*)** %fp_baz, align 8 - %call1 = call i32* %3(i8* %a) [ "callee_type"(metadata !"_ZTSFPvS_E.generalized") ] - call void @foo() [ "callee_type"(meta
[llvm-branch-commits] [clang] [clang] callee_type metadata for indirect calls (PR #117036)
https://github.com/Prabhuk updated https://github.com/llvm/llvm-project/pull/117036 >From b7fbe09b32ff02d4f7c52d82fbf8b5cd28138852 Mon Sep 17 00:00:00 2001 From: prabhukr Date: Wed, 23 Apr 2025 04:05:47 + Subject: [PATCH] Address review comments. Created using spr 1.3.6-beta.1 --- clang/lib/CodeGen/CGCall.cpp| 8 clang/lib/CodeGen/CodeGenModule.cpp | 10 +- clang/lib/CodeGen/CodeGenModule.h | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 185ee1a970aac..d8ab7140f7943 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5780,19 +5780,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (callOrInvoke) { *callOrInvoke = CI; if (CGM.getCodeGenOpts().CallGraphSection) { - assert((TargetDecl && TargetDecl->getFunctionType() || - Callee.getAbstractInfo().getCalleeFunctionProtoType()) && - "cannot find callsite type"); QualType CST; if (TargetDecl && TargetDecl->getFunctionType()) CST = QualType(TargetDecl->getFunctionType(), 0); else if (const auto *FPT = Callee.getAbstractInfo().getCalleeFunctionProtoType()) CST = QualType(FPT, 0); + else +llvm_unreachable( +"Cannot find the callee type to generate callee_type metadata."); // Set type identifier metadata of indirect calls for call graph section. if (!CST.isNull()) -CGM.CreateCalleeTypeMetadataForIcall(CST, *callOrInvoke); +CGM.createCalleeTypeMetadataForIcall(CST, *callOrInvoke); } } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 43cd2405571cf..2fc99639a75cb 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2654,7 +2654,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, // Skip available_externally functions. They won't be codegen'ed in the // current module anyway. if (getContext().GetGVALinkageForFunction(FD) != GVA_AvailableExternally) -CreateFunctionTypeMetadataForIcall(FD, F); +createFunctionTypeMetadataForIcall(FD, F); } } @@ -2868,7 +2868,7 @@ static bool hasExistingGeneralizedTypeMD(llvm::Function *F) { return MD->hasGeneralizedMDString(); } -void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, +void CodeGenModule::createFunctionTypeMetadataForIcall(const FunctionDecl *FD, llvm::Function *F) { if (CodeGenOpts.CallGraphSection && !hasExistingGeneralizedTypeMD(F) && (!F->hasLocalLinkage() || @@ -2898,7 +2898,7 @@ void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, F->addTypeMetadata(0, llvm::ConstantAsMetadata::get(CrossDsoTypeId)); } -void CodeGenModule::CreateCalleeTypeMetadataForIcall(const QualType &QT, +void CodeGenModule::createCalleeTypeMetadataForIcall(const QualType &QT, llvm::CallBase *CB) { // Only if needed for call graph section and only for indirect calls. if (!CodeGenOpts.CallGraphSection || !CB->isIndirectCall()) @@ -2909,7 +2909,7 @@ void CodeGenModule::CreateCalleeTypeMetadataForIcall(const QualType &QT, getLLVMContext(), {llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( llvm::Type::getInt64Ty(getLLVMContext()), 0)), TypeIdMD}); - llvm::MDTuple *MDN = llvm::MDNode::get(getLLVMContext(), { TypeTuple }); + llvm::MDTuple *MDN = llvm::MDNode::get(getLLVMContext(), {TypeTuple}); CB->setMetadata(llvm::LLVMContext::MD_callee_type, MDN); } @@ -3041,7 +3041,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, // jump table. if (!CodeGenOpts.SanitizeCfiCrossDso || !CodeGenOpts.SanitizeCfiCanonicalJumpTables) -CreateFunctionTypeMetadataForIcall(FD, F); +createFunctionTypeMetadataForIcall(FD, F); if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) setKCFIType(FD, F); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index dfbe4388349dd..4b53f0f241b52 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1619,11 +1619,11 @@ class CodeGenModule : public CodeGenTypeCache { llvm::Metadata *CreateMetadataIdentifierGeneralized(QualType T); /// Create and attach type metadata to the given function. - void CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, + void createFunctionTypeMetadataForIcall(const FunctionDecl *FD, llvm::Function *F); /// Create and attach type metadata to the given call. - void CreateCalleeTypeMetadataForIcall(const QualType &QT, llvm::CallBase *CB); + void createCa
[llvm-branch-commits] [clang] [clang] callee_type metadata for indirect calls (PR #117036)
https://github.com/Prabhuk updated https://github.com/llvm/llvm-project/pull/117036 >From b7fbe09b32ff02d4f7c52d82fbf8b5cd28138852 Mon Sep 17 00:00:00 2001 From: prabhukr Date: Wed, 23 Apr 2025 04:05:47 + Subject: [PATCH] Address review comments. Created using spr 1.3.6-beta.1 --- clang/lib/CodeGen/CGCall.cpp| 8 clang/lib/CodeGen/CodeGenModule.cpp | 10 +- clang/lib/CodeGen/CodeGenModule.h | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 185ee1a970aac..d8ab7140f7943 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5780,19 +5780,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (callOrInvoke) { *callOrInvoke = CI; if (CGM.getCodeGenOpts().CallGraphSection) { - assert((TargetDecl && TargetDecl->getFunctionType() || - Callee.getAbstractInfo().getCalleeFunctionProtoType()) && - "cannot find callsite type"); QualType CST; if (TargetDecl && TargetDecl->getFunctionType()) CST = QualType(TargetDecl->getFunctionType(), 0); else if (const auto *FPT = Callee.getAbstractInfo().getCalleeFunctionProtoType()) CST = QualType(FPT, 0); + else +llvm_unreachable( +"Cannot find the callee type to generate callee_type metadata."); // Set type identifier metadata of indirect calls for call graph section. if (!CST.isNull()) -CGM.CreateCalleeTypeMetadataForIcall(CST, *callOrInvoke); +CGM.createCalleeTypeMetadataForIcall(CST, *callOrInvoke); } } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 43cd2405571cf..2fc99639a75cb 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2654,7 +2654,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, // Skip available_externally functions. They won't be codegen'ed in the // current module anyway. if (getContext().GetGVALinkageForFunction(FD) != GVA_AvailableExternally) -CreateFunctionTypeMetadataForIcall(FD, F); +createFunctionTypeMetadataForIcall(FD, F); } } @@ -2868,7 +2868,7 @@ static bool hasExistingGeneralizedTypeMD(llvm::Function *F) { return MD->hasGeneralizedMDString(); } -void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, +void CodeGenModule::createFunctionTypeMetadataForIcall(const FunctionDecl *FD, llvm::Function *F) { if (CodeGenOpts.CallGraphSection && !hasExistingGeneralizedTypeMD(F) && (!F->hasLocalLinkage() || @@ -2898,7 +2898,7 @@ void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, F->addTypeMetadata(0, llvm::ConstantAsMetadata::get(CrossDsoTypeId)); } -void CodeGenModule::CreateCalleeTypeMetadataForIcall(const QualType &QT, +void CodeGenModule::createCalleeTypeMetadataForIcall(const QualType &QT, llvm::CallBase *CB) { // Only if needed for call graph section and only for indirect calls. if (!CodeGenOpts.CallGraphSection || !CB->isIndirectCall()) @@ -2909,7 +2909,7 @@ void CodeGenModule::CreateCalleeTypeMetadataForIcall(const QualType &QT, getLLVMContext(), {llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( llvm::Type::getInt64Ty(getLLVMContext()), 0)), TypeIdMD}); - llvm::MDTuple *MDN = llvm::MDNode::get(getLLVMContext(), { TypeTuple }); + llvm::MDTuple *MDN = llvm::MDNode::get(getLLVMContext(), {TypeTuple}); CB->setMetadata(llvm::LLVMContext::MD_callee_type, MDN); } @@ -3041,7 +3041,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, // jump table. if (!CodeGenOpts.SanitizeCfiCrossDso || !CodeGenOpts.SanitizeCfiCanonicalJumpTables) -CreateFunctionTypeMetadataForIcall(FD, F); +createFunctionTypeMetadataForIcall(FD, F); if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) setKCFIType(FD, F); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index dfbe4388349dd..4b53f0f241b52 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1619,11 +1619,11 @@ class CodeGenModule : public CodeGenTypeCache { llvm::Metadata *CreateMetadataIdentifierGeneralized(QualType T); /// Create and attach type metadata to the given function. - void CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, + void createFunctionTypeMetadataForIcall(const FunctionDecl *FD, llvm::Function *F); /// Create and attach type metadata to the given call. - void CreateCalleeTypeMetadataForIcall(const QualType &QT, llvm::CallBase *CB); + void createCa
[llvm-branch-commits] [clang] [clang] Introduce CallGraphSection option (PR #117037)
https://github.com/Prabhuk updated https://github.com/llvm/llvm-project/pull/117037 >From 6a12be2c5b60a95a06875b0b2c4f14228d1fa882 Mon Sep 17 00:00:00 2001 From: prabhukr Date: Wed, 12 Mar 2025 23:30:01 + Subject: [PATCH] Fix EOF newlines. Created using spr 1.3.6-beta.1 --- clang/test/Driver/call-graph-section.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/call-graph-section.c b/clang/test/Driver/call-graph-section.c index 108446729d857..5832aa6754137 100644 --- a/clang/test/Driver/call-graph-section.c +++ b/clang/test/Driver/call-graph-section.c @@ -2,4 +2,4 @@ // RUN: %clang -### -S -fcall-graph-section -fno-call-graph-section %s 2>&1 | FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s // CALL-GRAPH-SECTION: "-fcall-graph-section" -// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section" \ No newline at end of file +// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section" ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] Introduce CallGraphSection option (PR #117037)
https://github.com/Prabhuk updated https://github.com/llvm/llvm-project/pull/117037 >From 6a12be2c5b60a95a06875b0b2c4f14228d1fa882 Mon Sep 17 00:00:00 2001 From: prabhukr Date: Wed, 12 Mar 2025 23:30:01 + Subject: [PATCH] Fix EOF newlines. Created using spr 1.3.6-beta.1 --- clang/test/Driver/call-graph-section.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/call-graph-section.c b/clang/test/Driver/call-graph-section.c index 108446729d857..5832aa6754137 100644 --- a/clang/test/Driver/call-graph-section.c +++ b/clang/test/Driver/call-graph-section.c @@ -2,4 +2,4 @@ // RUN: %clang -### -S -fcall-graph-section -fno-call-graph-section %s 2>&1 | FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s // CALL-GRAPH-SECTION: "-fcall-graph-section" -// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section" \ No newline at end of file +// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section" ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC] Refactoring MCDXBC to support out of order storage of root parameters (PR #137284)
https://github.com/joaosaffran updated https://github.com/llvm/llvm-project/pull/137284 >From 7ac964196fc9195165dc1128d0f889f6ff1a93b4 Mon Sep 17 00:00:00 2001 From: joaosaffran Date: Fri, 25 Apr 2025 22:28:48 + Subject: [PATCH 1/8] addressing comments --- llvm/include/llvm/Object/DXContainer.h| 50 +++ .../include/llvm/ObjectYAML/DXContainerYAML.h | 6 +-- llvm/lib/ObjectYAML/DXContainerYAML.cpp | 17 ++- llvm/unittests/Object/DXContainerTest.cpp | 12 ++--- 4 files changed, 32 insertions(+), 53 deletions(-) diff --git a/llvm/include/llvm/Object/DXContainer.h b/llvm/include/llvm/Object/DXContainer.h index ba261a9e42aea..e359d85f08bec 100644 --- a/llvm/include/llvm/Object/DXContainer.h +++ b/llvm/include/llvm/Object/DXContainer.h @@ -120,18 +120,20 @@ template struct ViewArray { namespace DirectX { struct RootParameterView { const dxbc::RootParameterHeader &Header; - uint32_t Version; StringRef ParamData; RootParameterView(uint32_t V, const dxbc::RootParameterHeader &H, StringRef P) - : Header(H), Version(V), ParamData(P) {} + : Header(H), ParamData(P) {} - template Expected readParameter() { -T Struct; -if (sizeof(T) != ParamData.size()) + template Expected readParameter() { +assert(sizeof(VersionT) <= sizeof(T) && + "Parameter of higher version must inherit all previous version data " + "members"); +if (sizeof(VersionT) != ParamData.size()) return make_error( "Reading structure out of file bounds", object_error::parse_failed); -memcpy(&Struct, ParamData.data(), sizeof(T)); +T Struct; +memcpy(&Struct, ParamData.data(), sizeof(VersionT)); // DXContainer is always little endian if (sys::IsBigEndianHost) Struct.swapBytes(); @@ -150,34 +152,20 @@ struct RootConstantView : RootParameterView { } }; -struct RootDescriptorView_V1_0 : RootParameterView { - static bool classof(const RootParameterView *V) { -return (V->Version == 1 && -(V->Header.ParameterType == - llvm::to_underlying(dxbc::RootParameterType::CBV) || - V->Header.ParameterType == - llvm::to_underlying(dxbc::RootParameterType::SRV) || - V->Header.ParameterType == - llvm::to_underlying(dxbc::RootParameterType::UAV))); - } - - llvm::Expected read() { -return readParameter(); - } -}; - -struct RootDescriptorView_V1_1 : RootParameterView { +struct RootDescriptorView : RootParameterView { static bool classof(const RootParameterView *V) { -return (V->Version == 2 && -(V->Header.ParameterType == - llvm::to_underlying(dxbc::RootParameterType::CBV) || - V->Header.ParameterType == - llvm::to_underlying(dxbc::RootParameterType::SRV) || - V->Header.ParameterType == - llvm::to_underlying(dxbc::RootParameterType::UAV))); +return (V->Header.ParameterType == +llvm::to_underlying(dxbc::RootParameterType::CBV) || +V->Header.ParameterType == +llvm::to_underlying(dxbc::RootParameterType::SRV) || +V->Header.ParameterType == +llvm::to_underlying(dxbc::RootParameterType::UAV)); } - llvm::Expected read() { + llvm::Expected read(uint32_t Version) { +if (Version == 1) + return readParameter(); return readParameter(); } }; diff --git a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h index c54c995acd263..8bb9da7884bed 100644 --- a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h +++ b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h @@ -79,7 +79,6 @@ struct RootConstantsYaml { uint32_t Num32BitValues; }; -#define ROOT_DESCRIPTOR_FLAG(Num, Val) bool Val = false; struct RootDescriptorYaml { RootDescriptorYaml() = default; @@ -88,6 +87,7 @@ struct RootDescriptorYaml { uint32_t getEncodedFlags() const; +#define ROOT_DESCRIPTOR_FLAG(Num, Val) bool Val = false; #include "llvm/BinaryFormat/DXContainerConstants.def" }; @@ -95,7 +95,7 @@ struct RootParameterYamlDesc { uint32_t Type; uint32_t Visibility; uint32_t Offset; - RootParameterYamlDesc() {}; + RootParameterYamlDesc(){}; RootParameterYamlDesc(uint32_t T) : Type(T) { switch (T) { @@ -116,7 +116,6 @@ struct RootParameterYamlDesc { }; }; -#define ROOT_ELEMENT_FLAG(Num, Val) bool Val = false; struct RootSignatureYamlDesc { RootSignatureYamlDesc() = default; @@ -137,6 +136,7 @@ struct RootSignatureYamlDesc { static llvm::Expected create(const object::DirectX::RootSignature &Data); +#define ROOT_ELEMENT_FLAG(Num, Val) bool Val = false; #include "llvm/BinaryFormat/DXContainerConstants.def" }; diff --git a/llvm/lib/ObjectYAML/DXContainerYAML.cpp b/llvm/lib/ObjectYAML/DXContainerYAML.cpp index e49712852d612..c9d2084226b7a 100644 --- a/llvm/lib/ObjectYAML/DXContainerYAML.cpp +
[llvm-branch-commits] [llvm] [NFC] Refactoring MCDXBC to support out of order storage of root parameters (PR #137284)
https://github.com/joaosaffran updated https://github.com/llvm/llvm-project/pull/137284 >From 7ac964196fc9195165dc1128d0f889f6ff1a93b4 Mon Sep 17 00:00:00 2001 From: joaosaffran Date: Fri, 25 Apr 2025 22:28:48 + Subject: [PATCH 1/9] addressing comments --- llvm/include/llvm/Object/DXContainer.h| 50 +++ .../include/llvm/ObjectYAML/DXContainerYAML.h | 6 +-- llvm/lib/ObjectYAML/DXContainerYAML.cpp | 17 ++- llvm/unittests/Object/DXContainerTest.cpp | 12 ++--- 4 files changed, 32 insertions(+), 53 deletions(-) diff --git a/llvm/include/llvm/Object/DXContainer.h b/llvm/include/llvm/Object/DXContainer.h index ba261a9e42aea..e359d85f08bec 100644 --- a/llvm/include/llvm/Object/DXContainer.h +++ b/llvm/include/llvm/Object/DXContainer.h @@ -120,18 +120,20 @@ template struct ViewArray { namespace DirectX { struct RootParameterView { const dxbc::RootParameterHeader &Header; - uint32_t Version; StringRef ParamData; RootParameterView(uint32_t V, const dxbc::RootParameterHeader &H, StringRef P) - : Header(H), Version(V), ParamData(P) {} + : Header(H), ParamData(P) {} - template Expected readParameter() { -T Struct; -if (sizeof(T) != ParamData.size()) + template Expected readParameter() { +assert(sizeof(VersionT) <= sizeof(T) && + "Parameter of higher version must inherit all previous version data " + "members"); +if (sizeof(VersionT) != ParamData.size()) return make_error( "Reading structure out of file bounds", object_error::parse_failed); -memcpy(&Struct, ParamData.data(), sizeof(T)); +T Struct; +memcpy(&Struct, ParamData.data(), sizeof(VersionT)); // DXContainer is always little endian if (sys::IsBigEndianHost) Struct.swapBytes(); @@ -150,34 +152,20 @@ struct RootConstantView : RootParameterView { } }; -struct RootDescriptorView_V1_0 : RootParameterView { - static bool classof(const RootParameterView *V) { -return (V->Version == 1 && -(V->Header.ParameterType == - llvm::to_underlying(dxbc::RootParameterType::CBV) || - V->Header.ParameterType == - llvm::to_underlying(dxbc::RootParameterType::SRV) || - V->Header.ParameterType == - llvm::to_underlying(dxbc::RootParameterType::UAV))); - } - - llvm::Expected read() { -return readParameter(); - } -}; - -struct RootDescriptorView_V1_1 : RootParameterView { +struct RootDescriptorView : RootParameterView { static bool classof(const RootParameterView *V) { -return (V->Version == 2 && -(V->Header.ParameterType == - llvm::to_underlying(dxbc::RootParameterType::CBV) || - V->Header.ParameterType == - llvm::to_underlying(dxbc::RootParameterType::SRV) || - V->Header.ParameterType == - llvm::to_underlying(dxbc::RootParameterType::UAV))); +return (V->Header.ParameterType == +llvm::to_underlying(dxbc::RootParameterType::CBV) || +V->Header.ParameterType == +llvm::to_underlying(dxbc::RootParameterType::SRV) || +V->Header.ParameterType == +llvm::to_underlying(dxbc::RootParameterType::UAV)); } - llvm::Expected read() { + llvm::Expected read(uint32_t Version) { +if (Version == 1) + return readParameter(); return readParameter(); } }; diff --git a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h index c54c995acd263..8bb9da7884bed 100644 --- a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h +++ b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h @@ -79,7 +79,6 @@ struct RootConstantsYaml { uint32_t Num32BitValues; }; -#define ROOT_DESCRIPTOR_FLAG(Num, Val) bool Val = false; struct RootDescriptorYaml { RootDescriptorYaml() = default; @@ -88,6 +87,7 @@ struct RootDescriptorYaml { uint32_t getEncodedFlags() const; +#define ROOT_DESCRIPTOR_FLAG(Num, Val) bool Val = false; #include "llvm/BinaryFormat/DXContainerConstants.def" }; @@ -95,7 +95,7 @@ struct RootParameterYamlDesc { uint32_t Type; uint32_t Visibility; uint32_t Offset; - RootParameterYamlDesc() {}; + RootParameterYamlDesc(){}; RootParameterYamlDesc(uint32_t T) : Type(T) { switch (T) { @@ -116,7 +116,6 @@ struct RootParameterYamlDesc { }; }; -#define ROOT_ELEMENT_FLAG(Num, Val) bool Val = false; struct RootSignatureYamlDesc { RootSignatureYamlDesc() = default; @@ -137,6 +136,7 @@ struct RootSignatureYamlDesc { static llvm::Expected create(const object::DirectX::RootSignature &Data); +#define ROOT_ELEMENT_FLAG(Num, Val) bool Val = false; #include "llvm/BinaryFormat/DXContainerConstants.def" }; diff --git a/llvm/lib/ObjectYAML/DXContainerYAML.cpp b/llvm/lib/ObjectYAML/DXContainerYAML.cpp index e49712852d612..c9d2084226b7a 100644 --- a/llvm/lib/ObjectYAML/DXContainerYAML.cpp +
[llvm-branch-commits] [llvm] [NFC] Refactoring MCDXBC to support out of order storage of root parameters (PR #137284)
https://github.com/joaosaffran edited https://github.com/llvm/llvm-project/pull/137284 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [llvm] Introduce callee_type metadata (PR #87573)
@@ -0,0 +1,25 @@ +;; Test if the callee_type metadata is dropped when an indirect function call through a function ptr is promoted +;; to a direct function call during instcombine. + +; RUN: opt < %s -O2 | llvm-dis | FileCheck %s + +define dso_local noundef i32 @_Z13call_indirectPFicEc(ptr noundef %func, i8 noundef signext %x) local_unnamed_addr !type !0 { +entry: + %call = call noundef i32 %func(i8 noundef signext %x), !callee_type !1 + ret i32 %call +} + +define dso_local noundef i32 @_Z3barv() local_unnamed_addr !type !3 { +entry: + ; CHECK: %call.i = tail call noundef i32 @_Z3fooc(i8 noundef signext 97) + ; CHECK-NOT: %call.i = tail call noundef i32 @_Z3fooc(i8 noundef signext 97), !callee_type !1 + %call = call noundef i32 @_Z13call_indirectPFicEc(ptr noundef nonnull @_Z3fooc, i8 noundef signext 97) + ret i32 %call +} + Prabhuk wrote: Can you please explain the case that you are referring to here with a little more detail? https://github.com/llvm/llvm-project/pull/87573 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC] Refactoring MCDXBC to support out of order storage of root parameters (PR #137284)
https://github.com/joaosaffran updated https://github.com/llvm/llvm-project/pull/137284 >From a38f10b51ac930be4bb5a5718d204d9f2d0c0396 Mon Sep 17 00:00:00 2001 From: joaosaffran Date: Fri, 25 Apr 2025 05:09:08 + Subject: [PATCH 1/6] refactoring mcdxbc struct to store root parameters out of order --- .../llvm/MC/DXContainerRootSignature.h| 137 +- llvm/lib/MC/DXContainerRootSignature.cpp | 68 - llvm/lib/ObjectYAML/DXContainerEmitter.cpp| 26 ++-- llvm/lib/Target/DirectX/DXILRootSignature.cpp | 45 +++--- 4 files changed, 201 insertions(+), 75 deletions(-) diff --git a/llvm/include/llvm/MC/DXContainerRootSignature.h b/llvm/include/llvm/MC/DXContainerRootSignature.h index 44e26c81eedc1..e1f4abbcebf8f 100644 --- a/llvm/include/llvm/MC/DXContainerRootSignature.h +++ b/llvm/include/llvm/MC/DXContainerRootSignature.h @@ -6,21 +6,146 @@ // //===--===// +#include "llvm/ADT/STLForwardCompat.h" #include "llvm/BinaryFormat/DXContainer.h" +#include "llvm/Support/ErrorHandling.h" +#include #include -#include +#include namespace llvm { class raw_ostream; namespace mcdxbc { +struct RootParameterHeader : public dxbc::RootParameterHeader { + + size_t Location; + + RootParameterHeader() = default; + + RootParameterHeader(dxbc::RootParameterHeader H, size_t L) + : dxbc::RootParameterHeader(H), Location(L) {} +}; + +using RootDescriptor = std::variant; +using ParametersView = +std::variant; struct RootParameter { - dxbc::RootParameterHeader Header; - union { -dxbc::RootConstants Constants; -dxbc::RST0::v1::RootDescriptor Descriptor; + SmallVector Headers; + + SmallVector Constants; + SmallVector Descriptors; + + void addHeader(dxbc::RootParameterHeader H, size_t L) { +Headers.push_back(RootParameterHeader(H, L)); + } + + void addParameter(dxbc::RootParameterHeader H, dxbc::RootConstants C) { +addHeader(H, Constants.size()); +Constants.push_back(C); + } + + void addParameter(dxbc::RootParameterHeader H, +dxbc::RST0::v0::RootDescriptor D) { +addHeader(H, Descriptors.size()); +Descriptors.push_back(D); + } + + void addParameter(dxbc::RootParameterHeader H, +dxbc::RST0::v1::RootDescriptor D) { +addHeader(H, Descriptors.size()); +Descriptors.push_back(D); + } + + ParametersView get(const RootParameterHeader &H) const { +switch (H.ParameterType) { +case llvm::to_underlying(dxbc::RootParameterType::Constants32Bit): + return Constants[H.Location]; +case llvm::to_underlying(dxbc::RootParameterType::CBV): +case llvm::to_underlying(dxbc::RootParameterType::SRV): +case llvm::to_underlying(dxbc::RootParameterType::UAV): + RootDescriptor VersionedParam = Descriptors[H.Location]; + if (std::holds_alternative( + VersionedParam)) +return std::get(VersionedParam); + return std::get(VersionedParam); +} + +llvm_unreachable("Unimplemented parameter type"); + } + + struct iterator { +const RootParameter &Parameters; +SmallVector::const_iterator Current; + +// Changed parameter type to match member variable (removed const) +iterator(const RootParameter &P, + SmallVector::const_iterator C) +: Parameters(P), Current(C) {} +iterator(const iterator &) = default; + +ParametersView operator*() { + ParametersView Val; + switch (Current->ParameterType) { + case llvm::to_underlying(dxbc::RootParameterType::Constants32Bit): +Val = Parameters.Constants[Current->Location]; +break; + + case llvm::to_underlying(dxbc::RootParameterType::CBV): + case llvm::to_underlying(dxbc::RootParameterType::SRV): + case llvm::to_underlying(dxbc::RootParameterType::UAV): +RootDescriptor VersionedParam = +Parameters.Descriptors[Current->Location]; +if (std::holds_alternative( +VersionedParam)) + Val = std::get(VersionedParam); +else + Val = std::get(VersionedParam); +break; + } + return Val; +} + +iterator operator++() { + Current++; + return *this; +} + +iterator operator++(int) { + iterator Tmp = *this; + ++*this; + return Tmp; +} + +iterator operator--() { + Current--; + return *this; +} + +iterator operator--(int) { + iterator Tmp = *this; + --*this; + return Tmp; +} + +bool operator==(const iterator I) { return I.Current == Current; } +bool operator!=(const iterator I) { return !(*this == I); } }; + + iterator begin() const { return iterator(*this, Headers.begin()); } + + iterator end() const { return iterator(*this, Headers.end()); } + + size_t size() const { return Headers.size(); } + + bool isEmpty() const { return Headers.empty(); } + + llvm::iterator_range getAll() const { +ret
[llvm-branch-commits] [llvm] [SSAUpdaterBulk] Add PHI simplification pass. (PR #135180)
https://github.com/vpykhtin updated https://github.com/llvm/llvm-project/pull/135180 >From b05781e3c0d7c7b06bf1cc6035b1e447d64a1bf1 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Thu, 10 Apr 2025 11:56:57 + Subject: [PATCH] ssaupdaterbulk_add_phi_optimization --- .../llvm/Transforms/Utils/SSAUpdaterBulk.h| 5 +- llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp | 38 ++- .../Transforms/Utils/SSAUpdaterBulkTest.cpp | 67 +++ 3 files changed, 108 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h index b2cf29608f58b..d3dabaccc641f 100644 --- a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h +++ b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h @@ -13,7 +13,6 @@ #ifndef LLVM_TRANSFORMS_UTILS_SSAUPDATERBULK_H #define LLVM_TRANSFORMS_UTILS_SSAUPDATERBULK_H -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/PredIteratorCache.h" @@ -77,6 +76,10 @@ class SSAUpdaterBulk { /// vector. void RewriteAllUses(DominatorTree *DT, SmallVectorImpl *InsertedPHIs = nullptr); + + /// Rewrite all uses and simplify the inserted PHI nodes. + /// Use this method to preserve behavior when replacing SSAUpdater. + void RewriteAndOptimizeAllUses(DominatorTree &DT); }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp index d7bf791a23edf..01704f04d45ee 100644 --- a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp +++ b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp @@ -11,13 +11,14 @@ //===--===// #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -222,3 +223,38 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, } } } + +// Perform a single pass of simplification over the worklist of PHIs. +static void simplifyPass(MutableArrayRef Worklist, + const DataLayout &DL) { + for (PHINode *&PHI : Worklist) { +if (Value *Simplified = simplifyInstruction(PHI, DL)) { + PHI->replaceAllUsesWith(Simplified); + PHI->eraseFromParent(); + PHI = nullptr; // Mark as removed. +} + } +} + +static void deduplicatePass(ArrayRef Worklist) { + SmallDenseMap BBs; + for (PHINode *PHI : Worklist) { +if (PHI) + ++BBs[PHI->getParent()]; + } + + for (auto [BB, NumNewPHIs] : BBs) { +auto FirstExistingPN = std::next(BB->phis().begin(), NumNewPHIs); +EliminateNewDuplicatePHINodes(BB, FirstExistingPN); + } +} + +void SSAUpdaterBulk::RewriteAndOptimizeAllUses(DominatorTree &DT) { + SmallVector PHIs; + RewriteAllUses(&DT, &PHIs); + if (PHIs.empty()) +return; + + simplifyPass(PHIs, PHIs.front()->getParent()->getDataLayout()); + deduplicatePass(PHIs); +} diff --git a/llvm/unittests/Transforms/Utils/SSAUpdaterBulkTest.cpp b/llvm/unittests/Transforms/Utils/SSAUpdaterBulkTest.cpp index 841f44cf6bfed..6f58fe6d3a44a 100644 --- a/llvm/unittests/Transforms/Utils/SSAUpdaterBulkTest.cpp +++ b/llvm/unittests/Transforms/Utils/SSAUpdaterBulkTest.cpp @@ -308,3 +308,70 @@ TEST(SSAUpdaterBulk, TwoBBLoop) { EXPECT_EQ(Phi->getIncomingValueForBlock(Entry), ConstantInt::get(I32Ty, 0)); EXPECT_EQ(Phi->getIncomingValueForBlock(Loop), I); } + +TEST(SSAUpdaterBulk, SimplifyPHIs) { + const char *IR = R"( + define void @main(i32 %val, i1 %cond) { + entry: + br i1 %cond, label %left, label %right + left: + %add = add i32 %val, 1 + br label %exit + right: + %sub = sub i32 %val, 1 + br label %exit + exit: + %phi = phi i32 [ %sub, %right ], [ %add, %left ] + %cmp = icmp slt i32 0, 42 + ret void + } + )"; + + llvm::LLVMContext Context; + llvm::SMDiagnostic Err; + std::unique_ptr M = llvm::parseAssemblyString(IR, Err, Context); + ASSERT_NE(M, nullptr) << "Failed to parse IR: " << Err.getMessage(); + + Function *F = M->getFunction("main"); + auto *Entry = &F->getEntryBlock(); + auto *Left = Entry->getTerminator()->getSuccessor(0); + auto *Right = Entry->getTerminator()->getSuccessor(1); + auto *Exit = Left->getSingleSuccessor(); + auto *Val = &*F->arg_begin(); + auto *Phi = &Exit->front(); + auto *Cmp = &*std::next(Exit->begin()); + auto *Add = &Left->front(); + auto *Sub = &Right->front(); + + SSAUpdaterBulk Updater; + Type *I32Ty = Type::getInt32Ty(Context); + + // Use %val directly instead of creating a phi. + unsigned ValVar = Updater.AddVariable("Val", I32Ty);
[llvm-branch-commits] [llvm] [SSAUpdaterBulk] Add PHI simplification pass. (PR #135180)
https://github.com/vpykhtin updated https://github.com/llvm/llvm-project/pull/135180 >From b05781e3c0d7c7b06bf1cc6035b1e447d64a1bf1 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Thu, 10 Apr 2025 11:56:57 + Subject: [PATCH] ssaupdaterbulk_add_phi_optimization --- .../llvm/Transforms/Utils/SSAUpdaterBulk.h| 5 +- llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp | 38 ++- .../Transforms/Utils/SSAUpdaterBulkTest.cpp | 67 +++ 3 files changed, 108 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h index b2cf29608f58b..d3dabaccc641f 100644 --- a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h +++ b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h @@ -13,7 +13,6 @@ #ifndef LLVM_TRANSFORMS_UTILS_SSAUPDATERBULK_H #define LLVM_TRANSFORMS_UTILS_SSAUPDATERBULK_H -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/PredIteratorCache.h" @@ -77,6 +76,10 @@ class SSAUpdaterBulk { /// vector. void RewriteAllUses(DominatorTree *DT, SmallVectorImpl *InsertedPHIs = nullptr); + + /// Rewrite all uses and simplify the inserted PHI nodes. + /// Use this method to preserve behavior when replacing SSAUpdater. + void RewriteAndOptimizeAllUses(DominatorTree &DT); }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp index d7bf791a23edf..01704f04d45ee 100644 --- a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp +++ b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp @@ -11,13 +11,14 @@ //===--===// #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -222,3 +223,38 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, } } } + +// Perform a single pass of simplification over the worklist of PHIs. +static void simplifyPass(MutableArrayRef Worklist, + const DataLayout &DL) { + for (PHINode *&PHI : Worklist) { +if (Value *Simplified = simplifyInstruction(PHI, DL)) { + PHI->replaceAllUsesWith(Simplified); + PHI->eraseFromParent(); + PHI = nullptr; // Mark as removed. +} + } +} + +static void deduplicatePass(ArrayRef Worklist) { + SmallDenseMap BBs; + for (PHINode *PHI : Worklist) { +if (PHI) + ++BBs[PHI->getParent()]; + } + + for (auto [BB, NumNewPHIs] : BBs) { +auto FirstExistingPN = std::next(BB->phis().begin(), NumNewPHIs); +EliminateNewDuplicatePHINodes(BB, FirstExistingPN); + } +} + +void SSAUpdaterBulk::RewriteAndOptimizeAllUses(DominatorTree &DT) { + SmallVector PHIs; + RewriteAllUses(&DT, &PHIs); + if (PHIs.empty()) +return; + + simplifyPass(PHIs, PHIs.front()->getParent()->getDataLayout()); + deduplicatePass(PHIs); +} diff --git a/llvm/unittests/Transforms/Utils/SSAUpdaterBulkTest.cpp b/llvm/unittests/Transforms/Utils/SSAUpdaterBulkTest.cpp index 841f44cf6bfed..6f58fe6d3a44a 100644 --- a/llvm/unittests/Transforms/Utils/SSAUpdaterBulkTest.cpp +++ b/llvm/unittests/Transforms/Utils/SSAUpdaterBulkTest.cpp @@ -308,3 +308,70 @@ TEST(SSAUpdaterBulk, TwoBBLoop) { EXPECT_EQ(Phi->getIncomingValueForBlock(Entry), ConstantInt::get(I32Ty, 0)); EXPECT_EQ(Phi->getIncomingValueForBlock(Loop), I); } + +TEST(SSAUpdaterBulk, SimplifyPHIs) { + const char *IR = R"( + define void @main(i32 %val, i1 %cond) { + entry: + br i1 %cond, label %left, label %right + left: + %add = add i32 %val, 1 + br label %exit + right: + %sub = sub i32 %val, 1 + br label %exit + exit: + %phi = phi i32 [ %sub, %right ], [ %add, %left ] + %cmp = icmp slt i32 0, 42 + ret void + } + )"; + + llvm::LLVMContext Context; + llvm::SMDiagnostic Err; + std::unique_ptr M = llvm::parseAssemblyString(IR, Err, Context); + ASSERT_NE(M, nullptr) << "Failed to parse IR: " << Err.getMessage(); + + Function *F = M->getFunction("main"); + auto *Entry = &F->getEntryBlock(); + auto *Left = Entry->getTerminator()->getSuccessor(0); + auto *Right = Entry->getTerminator()->getSuccessor(1); + auto *Exit = Left->getSingleSuccessor(); + auto *Val = &*F->arg_begin(); + auto *Phi = &Exit->front(); + auto *Cmp = &*std::next(Exit->begin()); + auto *Add = &Left->front(); + auto *Sub = &Right->front(); + + SSAUpdaterBulk Updater; + Type *I32Ty = Type::getInt32Ty(Context); + + // Use %val directly instead of creating a phi. + unsigned ValVar = Updater.AddVariable("Val", I32Ty);
[llvm-branch-commits] [llvm] [AMDGPU] Improve StructurizeCFG pass performance by using SSAUpdaterBulk. (PR #135181)
https://github.com/vpykhtin updated https://github.com/llvm/llvm-project/pull/135181 >From 636ad19bbf446fd13e7a55dc68d77242f3e45bb1 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Thu, 10 Apr 2025 11:58:13 + Subject: [PATCH] amdgpu_use_ssaupdaterbulk_in_structurizecfg --- llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 25 +++ 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index eb22b50532695..8c2639aa0e138 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -47,6 +47,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/SSAUpdaterBulk.h" #include #include @@ -317,7 +318,7 @@ class StructurizeCFG { void collectInfos(); - void insertConditions(bool Loops); + void insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter); void simplifyConditions(); @@ -600,10 +601,9 @@ void StructurizeCFG::collectInfos() { } /// Insert the missing branch conditions -void StructurizeCFG::insertConditions(bool Loops) { +void StructurizeCFG::insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter) { BranchVector &Conds = Loops ? LoopConds : Conditions; Value *Default = Loops ? BoolTrue : BoolFalse; - SSAUpdater PhiInserter; for (BranchInst *Term : Conds) { assert(Term->isConditional()); @@ -612,8 +612,9 @@ void StructurizeCFG::insertConditions(bool Loops) { BasicBlock *SuccTrue = Term->getSuccessor(0); BasicBlock *SuccFalse = Term->getSuccessor(1); -PhiInserter.Initialize(Boolean, ""); -PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); +unsigned Variable = PhiInserter.AddVariable("", Boolean); +PhiInserter.AddAvailableValue(Variable, Loops ? SuccFalse : Parent, + Default); BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; @@ -626,7 +627,7 @@ void StructurizeCFG::insertConditions(bool Loops) { ParentInfo = PI; break; } - PhiInserter.AddAvailableValue(BB, PI.Pred); + PhiInserter.AddAvailableValue(Variable, BB, PI.Pred); Dominator.addAndRememberBlock(BB); } @@ -635,9 +636,9 @@ void StructurizeCFG::insertConditions(bool Loops) { CondBranchWeights::setMetadata(*Term, ParentInfo.Weights); } else { if (!Dominator.resultIsRememberedBlock()) -PhiInserter.AddAvailableValue(Dominator.result(), Default); +PhiInserter.AddAvailableValue(Variable, Dominator.result(), Default); - Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); + PhiInserter.AddUse(Variable, &Term->getOperandUse(0)); } } } @@ -1321,8 +1322,12 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) { orderNodes(); collectInfos(); createFlow(); - insertConditions(false); - insertConditions(true); + + SSAUpdaterBulk PhiInserter; + insertConditions(false, PhiInserter); + insertConditions(true, PhiInserter); + PhiInserter.RewriteAndOptimizeAllUses(*DT); + setPhiValues(); simplifyConditions(); simplifyAffectedPhis(); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Improve StructurizeCFG pass performance by using SSAUpdaterBulk. (PR #135181)
https://github.com/vpykhtin updated https://github.com/llvm/llvm-project/pull/135181 >From 636ad19bbf446fd13e7a55dc68d77242f3e45bb1 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Thu, 10 Apr 2025 11:58:13 + Subject: [PATCH] amdgpu_use_ssaupdaterbulk_in_structurizecfg --- llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 25 +++ 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index eb22b50532695..8c2639aa0e138 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -47,6 +47,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/SSAUpdaterBulk.h" #include #include @@ -317,7 +318,7 @@ class StructurizeCFG { void collectInfos(); - void insertConditions(bool Loops); + void insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter); void simplifyConditions(); @@ -600,10 +601,9 @@ void StructurizeCFG::collectInfos() { } /// Insert the missing branch conditions -void StructurizeCFG::insertConditions(bool Loops) { +void StructurizeCFG::insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter) { BranchVector &Conds = Loops ? LoopConds : Conditions; Value *Default = Loops ? BoolTrue : BoolFalse; - SSAUpdater PhiInserter; for (BranchInst *Term : Conds) { assert(Term->isConditional()); @@ -612,8 +612,9 @@ void StructurizeCFG::insertConditions(bool Loops) { BasicBlock *SuccTrue = Term->getSuccessor(0); BasicBlock *SuccFalse = Term->getSuccessor(1); -PhiInserter.Initialize(Boolean, ""); -PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); +unsigned Variable = PhiInserter.AddVariable("", Boolean); +PhiInserter.AddAvailableValue(Variable, Loops ? SuccFalse : Parent, + Default); BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; @@ -626,7 +627,7 @@ void StructurizeCFG::insertConditions(bool Loops) { ParentInfo = PI; break; } - PhiInserter.AddAvailableValue(BB, PI.Pred); + PhiInserter.AddAvailableValue(Variable, BB, PI.Pred); Dominator.addAndRememberBlock(BB); } @@ -635,9 +636,9 @@ void StructurizeCFG::insertConditions(bool Loops) { CondBranchWeights::setMetadata(*Term, ParentInfo.Weights); } else { if (!Dominator.resultIsRememberedBlock()) -PhiInserter.AddAvailableValue(Dominator.result(), Default); +PhiInserter.AddAvailableValue(Variable, Dominator.result(), Default); - Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); + PhiInserter.AddUse(Variable, &Term->getOperandUse(0)); } } } @@ -1321,8 +1322,12 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) { orderNodes(); collectInfos(); createFlow(); - insertConditions(false); - insertConditions(true); + + SSAUpdaterBulk PhiInserter; + insertConditions(false, PhiInserter); + insertConditions(true, PhiInserter); + PhiInserter.RewriteAndOptimizeAllUses(*DT); + setPhiValues(); simplifyConditions(); simplifyAffectedPhis(); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopVectorizer] Bundle partial reductions with different extensions (PR #136997)
@@ -2493,11 +2493,13 @@ class VPExtendedReductionRecipe : public VPReductionRecipe { /// recipe is abstract and needs to be lowered to concrete recipes before /// codegen. The Operands are {ChainOp, VecOp1, VecOp2, [Condition]}. class VPMulAccumulateReductionRecipe : public VPReductionRecipe { - /// Opcode of the extend recipe. - Instruction::CastOps ExtOp; + /// Opcodes of the extend recipes. SamTebbs33 wrote: I like that, thanks. Added. https://github.com/llvm/llvm-project/pull/136997 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopVectorizer] Bundle partial reductions with different extensions (PR #136997)
@@ -2438,14 +2438,14 @@ VPMulAccumulateReductionRecipe::computeCost(ElementCount VF, return Ctx.TTI.getPartialReductionCost( Instruction::Add, Ctx.Types.inferScalarType(getVecOp0()), Ctx.Types.inferScalarType(getVecOp1()), getResultType(), VF, -TTI::getPartialReductionExtendKind(getExtOpcode()), -TTI::getPartialReductionExtendKind(getExtOpcode()), Instruction::Mul); +TTI::getPartialReductionExtendKind(getExt0Opcode()), +TTI::getPartialReductionExtendKind(getExt1Opcode()), Instruction::Mul); } Type *RedTy = Ctx.Types.inferScalarType(this); auto *SrcVecTy = cast(toVectorTy(Ctx.Types.inferScalarType(getVecOp0()), VF)); - return Ctx.TTI.getMulAccReductionCost(isZExt(), RedTy, SrcVecTy, + return Ctx.TTI.getMulAccReductionCost(isZExt0(), RedTy, SrcVecTy, SamTebbs33 wrote: I started off by modifying the TTI hook but found that it wasn't actually necessary since only partial reductions make use of the differing signedness and they don't use this hook. If someone is interested in getting mul-acc-reduce generated with different extensions then they can do the investigation needed for costing but I think it's outside the scope of this work. https://github.com/llvm/llvm-project/pull/136997 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/20.x: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) (PR #137620)
https://github.com/vgvassilev approved this pull request. Lgtm! https://github.com/llvm/llvm-project/pull/137620 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopVectorizer] Bundle partial reductions with different extensions (PR #136997)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/136997 >From 10c4727074a7f5b4502ad08dc655be8fa5ffa3d2 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Wed, 23 Apr 2025 13:16:38 +0100 Subject: [PATCH 1/2] [LoopVectorizer] Bundle partial reductions with different extensions This PR adds support for extensions of different signedness to VPMulAccumulateReductionRecipe and allows such partial reductions to be bundled into that class. --- llvm/lib/Transforms/Vectorize/VPlan.h | 42 +- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 27 ++--- .../Transforms/Vectorize/VPlanTransforms.cpp | 25 - .../partial-reduce-dot-product-mixed.ll | 56 +-- .../LoopVectorize/AArch64/vplan-printing.ll | 29 +- 5 files changed, 99 insertions(+), 80 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 20d272e69e6e7..e11f608d068da 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2493,11 +2493,13 @@ class VPExtendedReductionRecipe : public VPReductionRecipe { /// recipe is abstract and needs to be lowered to concrete recipes before /// codegen. The Operands are {ChainOp, VecOp1, VecOp2, [Condition]}. class VPMulAccumulateReductionRecipe : public VPReductionRecipe { - /// Opcode of the extend recipe. - Instruction::CastOps ExtOp; + /// Opcodes of the extend recipes. + Instruction::CastOps ExtOp0; + Instruction::CastOps ExtOp1; - /// Non-neg flag of the extend recipe. - bool IsNonNeg = false; + /// Non-neg flags of the extend recipe. + bool IsNonNeg0 = false; + bool IsNonNeg1 = false; Type *ResultTy; @@ -2512,7 +2514,8 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe { MulAcc->getCondOp(), MulAcc->isOrdered(), WrapFlagsTy(MulAcc->hasNoUnsignedWrap(), MulAcc->hasNoSignedWrap()), MulAcc->getDebugLoc()), -ExtOp(MulAcc->getExtOpcode()), IsNonNeg(MulAcc->isNonNeg()), +ExtOp0(MulAcc->getExt0Opcode()), ExtOp1(MulAcc->getExt1Opcode()), +IsNonNeg0(MulAcc->isNonNeg0()), IsNonNeg1(MulAcc->isNonNeg1()), ResultTy(MulAcc->getResultType()), IsPartialReduction(MulAcc->isPartialReduction()) {} @@ -2526,7 +2529,8 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe { R->getCondOp(), R->isOrdered(), WrapFlagsTy(Mul->hasNoUnsignedWrap(), Mul->hasNoSignedWrap()), R->getDebugLoc()), -ExtOp(Ext0->getOpcode()), IsNonNeg(Ext0->isNonNeg()), +ExtOp0(Ext0->getOpcode()), ExtOp1(Ext1->getOpcode()), +IsNonNeg0(Ext0->isNonNeg()), IsNonNeg1(Ext1->isNonNeg()), ResultTy(ResultTy), IsPartialReduction(isa(R)) { assert(RecurrenceDescriptor::getOpcode(getRecurrenceKind()) == @@ -2542,7 +2546,8 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe { R->getCondOp(), R->isOrdered(), WrapFlagsTy(Mul->hasNoUnsignedWrap(), Mul->hasNoSignedWrap()), R->getDebugLoc()), -ExtOp(Instruction::CastOps::CastOpsEnd) { +ExtOp0(Instruction::CastOps::CastOpsEnd), +ExtOp1(Instruction::CastOps::CastOpsEnd) { assert(RecurrenceDescriptor::getOpcode(getRecurrenceKind()) == Instruction::Add && "The reduction instruction in MulAccumulateReductionRecipe must be " @@ -2586,19 +2591,26 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe { VPValue *getVecOp1() const { return getOperand(2); } /// Return if this MulAcc recipe contains extend instructions. - bool isExtended() const { return ExtOp != Instruction::CastOps::CastOpsEnd; } + bool isExtended() const { return ExtOp0 != Instruction::CastOps::CastOpsEnd; } /// Return if the operands of mul instruction come from same extend. - bool isSameExtend() const { return getVecOp0() == getVecOp1(); } + bool isSameExtendVal() const { return getVecOp0() == getVecOp1(); } - /// Return the opcode of the underlying extend. - Instruction::CastOps getExtOpcode() const { return ExtOp; } + /// Return the opcode of the underlying extends. + Instruction::CastOps getExt0Opcode() const { return ExtOp0; } + Instruction::CastOps getExt1Opcode() const { return ExtOp1; } + + /// Return if the first extend's opcode is ZExt. + bool isZExt0() const { return ExtOp0 == Instruction::CastOps::ZExt; } + + /// Return if the second extend's opcode is ZExt. + bool isZExt1() const { return ExtOp1 == Instruction::CastOps::ZExt; } - /// Return if the extend opcode is ZExt. - bool isZExt() const { return ExtOp == Instruction::CastOps::ZExt; } + /// Return the non negative flag of the first ext recipe. + bool isNonNeg0() const { return IsNonNeg0; } - /// Return the non negative flag of the ext recipe. - bool isNonNeg() const { return IsNonNeg; } + /// Return the non negative flag of the second
[llvm-branch-commits] [lld] release/20.x: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) (PR #137620)
https://github.com/anutosh491 updated https://github.com/llvm/llvm-project/pull/137620 >From 8efbc116707fd482ddb2d3d890bbd93c0b01427b Mon Sep 17 00:00:00 2001 From: Anutosh Bhat Date: Fri, 25 Apr 2025 20:05:00 +0530 Subject: [PATCH 1/2] [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Towards This change moves WasmSym from a static global struct to an instance owned by Ctx, allowing it to be reset cleanly between linker runs. This enables safe support for multiple invocations of wasm-ld within the same process Changes done - Converted WasmSym from a static struct to a regular struct with instance members. - Added a std::unique_ptr wasmSym field inside Ctx. - Reset wasmSym in Ctx::reset() to clear state between links. - Replaced all WasmSym:: references with ctx.wasmSym->. - Removed global symbol definitions from Symbols.cpp that are no longer needed. Clearing wasmSym in ctx.reset() ensures a clean slate for each link invocation, preventing symbol leakage across runs—critical when using wasm-ld/lld as a reentrant library where global state can cause subtle, hard-to-debug errors. - Co-authored-by: Vassil Vassilev (cherry picked from commit 9cbbb74d370c09e13b8412f21dccb7d2c4afc6a4) --- lld/wasm/Config.h | 110 +++ lld/wasm/Driver.cpp| 64 +-- lld/wasm/InputChunks.cpp | 10 +- lld/wasm/MarkLive.cpp | 6 +- lld/wasm/OutputSections.cpp| 4 +- lld/wasm/Symbols.cpp | 25 - lld/wasm/Symbols.h | 99 - lld/wasm/SyntheticSections.cpp | 32 +++--- lld/wasm/Writer.cpp| 187 + 9 files changed, 262 insertions(+), 275 deletions(-) diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index 1fa6c42d9cd86..71dabaedb8a8c 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -32,6 +32,11 @@ class InputTable; class InputGlobal; class InputFunction; class Symbol; +class DefinedData; +class GlobalSymbol; +class DefinedFunction; +class UndefinedGlobal; +class TableSymbol; // For --unresolved-symbols. enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic }; @@ -139,6 +144,111 @@ struct Ctx { llvm::SmallVector syntheticGlobals; llvm::SmallVector syntheticTables; + // linker-generated symbols + struct WasmSym { +// __global_base +// Symbol marking the start of the global section. +DefinedData *globalBase; + +// __stack_pointer/__stack_low/__stack_high +// Global that holds current value of stack pointer and data symbols marking +// the start and end of the stack region. stackPointer is initialized to +// stackHigh and grows downwards towards stackLow +GlobalSymbol *stackPointer; +DefinedData *stackLow; +DefinedData *stackHigh; + +// __tls_base +// Global that holds the address of the base of the current thread's +// TLS block. +GlobalSymbol *tlsBase; + +// __tls_size +// Symbol whose value is the size of the TLS block. +GlobalSymbol *tlsSize; + +// __tls_size +// Symbol whose value is the alignment of the TLS block. +GlobalSymbol *tlsAlign; + +// __data_end +// Symbol marking the end of the data and bss. +DefinedData *dataEnd; + +// __heap_base/__heap_end +// Symbols marking the beginning and end of the "heap". It starts at the end +// of the data, bss and explicit stack, and extends to the end of the linear +// memory allocated by wasm-ld. This region of memory is not used by the +// linked code, so it may be used as a backing store for `sbrk` or `malloc` +// implementations. +DefinedData *heapBase; +DefinedData *heapEnd; + +// __wasm_first_page_end +// A symbol whose address is the end of the first page in memory (if any). +DefinedData *firstPageEnd; + +// __wasm_init_memory_flag +// Symbol whose contents are nonzero iff memory has already been +// initialized. +DefinedData *initMemoryFlag; + +// __wasm_init_memory +// Function that initializes passive data segments during instantiation. +DefinedFunction *initMemory; + +// __wasm_call_ctors +// Function that directly calls all ctors in priority order. +DefinedFunction *callCtors; + +// __wasm_call_dtors +// Function that calls the libc/etc. cleanup function. +DefinedFunction *callDtors; + +// __wasm_apply_global_relocs +// Function that applies relocations to wasm globals post-instantiation. +// Unlike __wasm_apply_data_relocs this needs to run on every thread. +DefinedFunction *applyGlobalRelocs; + +// __wasm_apply_tls_relocs +// Like __wasm_apply_data_relocs but for TLS section. These must be +// delayed until __wasm_init_tls. +DefinedFunction *applyTLSRelocs; + +// __wasm_apply_global_tls_relocs +// Like applyGlob
[llvm-branch-commits] [lld] release/20.x: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) (PR #137620)
anutosh491 wrote: cc @tstellar I couldn't get the bot to cherry pick this due to a simple conflict that I had to manually resolved https://github.com/llvm/llvm-project/pull/134970#issuecomment-2834854620 Hope this is all that's needed from my side ! https://github.com/llvm/llvm-project/pull/137620 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/20.x: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) (PR #137620)
https://github.com/anutosh491 updated https://github.com/llvm/llvm-project/pull/137620 >From 8efbc116707fd482ddb2d3d890bbd93c0b01427b Mon Sep 17 00:00:00 2001 From: Anutosh Bhat Date: Fri, 25 Apr 2025 20:05:00 +0530 Subject: [PATCH] [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Towards This change moves WasmSym from a static global struct to an instance owned by Ctx, allowing it to be reset cleanly between linker runs. This enables safe support for multiple invocations of wasm-ld within the same process Changes done - Converted WasmSym from a static struct to a regular struct with instance members. - Added a std::unique_ptr wasmSym field inside Ctx. - Reset wasmSym in Ctx::reset() to clear state between links. - Replaced all WasmSym:: references with ctx.wasmSym->. - Removed global symbol definitions from Symbols.cpp that are no longer needed. Clearing wasmSym in ctx.reset() ensures a clean slate for each link invocation, preventing symbol leakage across runs—critical when using wasm-ld/lld as a reentrant library where global state can cause subtle, hard-to-debug errors. - Co-authored-by: Vassil Vassilev (cherry picked from commit 9cbbb74d370c09e13b8412f21dccb7d2c4afc6a4) --- lld/wasm/Config.h | 110 +++ lld/wasm/Driver.cpp| 64 +-- lld/wasm/InputChunks.cpp | 10 +- lld/wasm/MarkLive.cpp | 6 +- lld/wasm/OutputSections.cpp| 4 +- lld/wasm/Symbols.cpp | 25 - lld/wasm/Symbols.h | 99 - lld/wasm/SyntheticSections.cpp | 32 +++--- lld/wasm/Writer.cpp| 187 + 9 files changed, 262 insertions(+), 275 deletions(-) diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index 1fa6c42d9cd86..71dabaedb8a8c 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -32,6 +32,11 @@ class InputTable; class InputGlobal; class InputFunction; class Symbol; +class DefinedData; +class GlobalSymbol; +class DefinedFunction; +class UndefinedGlobal; +class TableSymbol; // For --unresolved-symbols. enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic }; @@ -139,6 +144,111 @@ struct Ctx { llvm::SmallVector syntheticGlobals; llvm::SmallVector syntheticTables; + // linker-generated symbols + struct WasmSym { +// __global_base +// Symbol marking the start of the global section. +DefinedData *globalBase; + +// __stack_pointer/__stack_low/__stack_high +// Global that holds current value of stack pointer and data symbols marking +// the start and end of the stack region. stackPointer is initialized to +// stackHigh and grows downwards towards stackLow +GlobalSymbol *stackPointer; +DefinedData *stackLow; +DefinedData *stackHigh; + +// __tls_base +// Global that holds the address of the base of the current thread's +// TLS block. +GlobalSymbol *tlsBase; + +// __tls_size +// Symbol whose value is the size of the TLS block. +GlobalSymbol *tlsSize; + +// __tls_size +// Symbol whose value is the alignment of the TLS block. +GlobalSymbol *tlsAlign; + +// __data_end +// Symbol marking the end of the data and bss. +DefinedData *dataEnd; + +// __heap_base/__heap_end +// Symbols marking the beginning and end of the "heap". It starts at the end +// of the data, bss and explicit stack, and extends to the end of the linear +// memory allocated by wasm-ld. This region of memory is not used by the +// linked code, so it may be used as a backing store for `sbrk` or `malloc` +// implementations. +DefinedData *heapBase; +DefinedData *heapEnd; + +// __wasm_first_page_end +// A symbol whose address is the end of the first page in memory (if any). +DefinedData *firstPageEnd; + +// __wasm_init_memory_flag +// Symbol whose contents are nonzero iff memory has already been +// initialized. +DefinedData *initMemoryFlag; + +// __wasm_init_memory +// Function that initializes passive data segments during instantiation. +DefinedFunction *initMemory; + +// __wasm_call_ctors +// Function that directly calls all ctors in priority order. +DefinedFunction *callCtors; + +// __wasm_call_dtors +// Function that calls the libc/etc. cleanup function. +DefinedFunction *callDtors; + +// __wasm_apply_global_relocs +// Function that applies relocations to wasm globals post-instantiation. +// Unlike __wasm_apply_data_relocs this needs to run on every thread. +DefinedFunction *applyGlobalRelocs; + +// __wasm_apply_tls_relocs +// Like __wasm_apply_data_relocs but for TLS section. These must be +// delayed until __wasm_init_tls. +DefinedFunction *applyTLSRelocs; + +// __wasm_apply_global_tls_relocs +// Like applyGlobalRe
[llvm-branch-commits] [llvm] release/20.x: [SystemZ] Fix compile time regression in adjustInliningThreshold(). (#137527) (PR #137628)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/137628 Backport 98b895d Requested by: @JonPsson1 >From c854e4ec6031904cd3030dbd6d06e4d86ec7484f Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Mon, 28 Apr 2025 07:04:07 -0600 Subject: [PATCH] [SystemZ] Fix compile time regression in adjustInliningThreshold(). (#137527) Instead of always iterating over all GlobalVariable:s in the Module to find the case where both Caller and Callee is using the same GV heavily, first scan Callee (only if less than 200 instructions) for all GVs used more than 10 times, and then do the counting for the Caller for just those relevant GVs. The limit of 200 instructions makes sense as this aims to inline a relatively small function using a GV +10 times. This resolves the compile time problem with zig where it is on main (compared to removing the heuristic) a 380% increase, but with this change <0.5% increase (total user compile time with opt). Fixes #134714. (cherry picked from commit 98b895da30c03b7061b8740d91c0e7998e69d091) --- .../SystemZ/SystemZTargetTransformInfo.cpp| 48 --- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 2b94832939419..7eec38b79cb82 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" @@ -80,7 +81,6 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const { const Function *Callee = CB->getCalledFunction(); if (!Callee) return 0; - const Module *M = Caller->getParent(); // Increase the threshold if an incoming argument is used only as a memcpy // source. @@ -92,25 +92,37 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const { } } - // Give bonus for globals used much in both caller and callee. - std::set CalleeGlobals; - std::set CallerGlobals; - for (const GlobalVariable &Global : M->globals()) -for (const User *U : Global.users()) - if (const Instruction *User = dyn_cast(U)) { -if (User->getParent()->getParent() == Callee) - CalleeGlobals.insert(&Global); -if (User->getParent()->getParent() == Caller) - CallerGlobals.insert(&Global); + // Give bonus for globals used much in both caller and a relatively small + // callee. + unsigned InstrCount = 0; + SmallDenseMap Ptr2NumUses; + for (auto &I : instructions(Callee)) { +if (++InstrCount == 200) { + Ptr2NumUses.clear(); + break; +} +if (const auto *SI = dyn_cast(&I)) { + if (!SI->isVolatile()) +if (auto *GV = dyn_cast(SI->getPointerOperand())) + Ptr2NumUses[GV]++; +} else if (const auto *LI = dyn_cast(&I)) { + if (!LI->isVolatile()) +if (auto *GV = dyn_cast(LI->getPointerOperand())) + Ptr2NumUses[GV]++; +} else if (const auto *GEP = dyn_cast(&I)) { + if (auto *GV = dyn_cast(GEP->getPointerOperand())) { +unsigned NumStores = 0, NumLoads = 0; +countNumMemAccesses(GEP, NumStores, NumLoads, Callee); +Ptr2NumUses[GV] += NumLoads + NumStores; } - for (auto *GV : CalleeGlobals) -if (CallerGlobals.count(GV)) { - unsigned CalleeStores = 0, CalleeLoads = 0; +} + } + + for (auto [Ptr, NumCalleeUses] : Ptr2NumUses) +if (NumCalleeUses > 10) { unsigned CallerStores = 0, CallerLoads = 0; - countNumMemAccesses(GV, CalleeStores, CalleeLoads, Callee); - countNumMemAccesses(GV, CallerStores, CallerLoads, Caller); - if ((CalleeStores + CalleeLoads) > 10 && - (CallerStores + CallerLoads) > 10) { + countNumMemAccesses(Ptr, CallerStores, CallerLoads, Caller); + if (CallerStores + CallerLoads > 10) { Bonus = 1000; break; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [SystemZ] Fix compile time regression in adjustInliningThreshold(). (#137527) (PR #137628)
llvmbot wrote: @llvm/pr-subscribers-backend-systemz Author: None (llvmbot) Changes Backport 98b895d Requested by: @JonPsson1 --- Full diff: https://github.com/llvm/llvm-project/pull/137628.diff 1 Files Affected: - (modified) llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp (+30-18) ``diff diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 2b94832939419..7eec38b79cb82 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" @@ -80,7 +81,6 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const { const Function *Callee = CB->getCalledFunction(); if (!Callee) return 0; - const Module *M = Caller->getParent(); // Increase the threshold if an incoming argument is used only as a memcpy // source. @@ -92,25 +92,37 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const { } } - // Give bonus for globals used much in both caller and callee. - std::set CalleeGlobals; - std::set CallerGlobals; - for (const GlobalVariable &Global : M->globals()) -for (const User *U : Global.users()) - if (const Instruction *User = dyn_cast(U)) { -if (User->getParent()->getParent() == Callee) - CalleeGlobals.insert(&Global); -if (User->getParent()->getParent() == Caller) - CallerGlobals.insert(&Global); + // Give bonus for globals used much in both caller and a relatively small + // callee. + unsigned InstrCount = 0; + SmallDenseMap Ptr2NumUses; + for (auto &I : instructions(Callee)) { +if (++InstrCount == 200) { + Ptr2NumUses.clear(); + break; +} +if (const auto *SI = dyn_cast(&I)) { + if (!SI->isVolatile()) +if (auto *GV = dyn_cast(SI->getPointerOperand())) + Ptr2NumUses[GV]++; +} else if (const auto *LI = dyn_cast(&I)) { + if (!LI->isVolatile()) +if (auto *GV = dyn_cast(LI->getPointerOperand())) + Ptr2NumUses[GV]++; +} else if (const auto *GEP = dyn_cast(&I)) { + if (auto *GV = dyn_cast(GEP->getPointerOperand())) { +unsigned NumStores = 0, NumLoads = 0; +countNumMemAccesses(GEP, NumStores, NumLoads, Callee); +Ptr2NumUses[GV] += NumLoads + NumStores; } - for (auto *GV : CalleeGlobals) -if (CallerGlobals.count(GV)) { - unsigned CalleeStores = 0, CalleeLoads = 0; +} + } + + for (auto [Ptr, NumCalleeUses] : Ptr2NumUses) +if (NumCalleeUses > 10) { unsigned CallerStores = 0, CallerLoads = 0; - countNumMemAccesses(GV, CalleeStores, CalleeLoads, Callee); - countNumMemAccesses(GV, CallerStores, CallerLoads, Caller); - if ((CalleeStores + CalleeLoads) > 10 && - (CallerStores + CallerLoads) > 10) { + countNumMemAccesses(Ptr, CallerStores, CallerLoads, Caller); + if (CallerStores + CallerLoads > 10) { Bonus = 1000; break; } `` https://github.com/llvm/llvm-project/pull/137628 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [SystemZ] Fix compile time regression in adjustInliningThreshold(). (#137527) (PR #137628)
llvmbot wrote: @uweigand What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/137628 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenACC][OpenMP] Separate implementations of ATOMIC constructs (PR #137517)
kparzysz wrote: Yes, it started as a copy, then I removed templates, and replaced type parameters with concrete types, and removed the "OmpAcc" from function names. https://github.com/llvm/llvm-project/pull/137517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [SystemZ] Fix compile time regression in adjustInliningThreshold(). (#137527) (PR #137628)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/137628 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Fix ninf propagation for fcmp+sel -> minmax (#136433) (PR #137605)
https://github.com/nikic commented: I don't think there is a need to backport FMF propagation fixes. https://github.com/llvm/llvm-project/pull/137605 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Do not fold logical is_finite test (#136851) (PR #137606)
https://github.com/nikic approved this pull request. https://github.com/llvm/llvm-project/pull/137606 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Fix ninf propagation for fcmp+sel -> minmax (#136433) (PR #137605)
dtcxzyw wrote: > I don't think there is a need to backport FMF propagation fixes. Is there a policy to judge whether or not to backport a miscompilation bug fix? Actually, it is unlikely to trigger this bug in real-world projects. But this fix is simple and safe to be backported. I am fine with not backporting this if the reason is "it depends on https://github.com/llvm/llvm-project/pull/137131";. https://github.com/llvm/llvm-project/pull/137605 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenACC][OpenMP] Separate implementations of ATOMIC constructs (PR #137517)
https://github.com/tblah approved this pull request. LGTM. If I didn't miss anything, this patch copies the old header code verbatim into both implementation files? https://github.com/llvm/llvm-project/pull/137517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-repl] : Fix clang-repl crash with --cuda flag (#136404) (PR #137615)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/137615 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-repl] : Fix clang-repl crash with --cuda flag (#136404) (PR #137615)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/137615 Backport 21fb19f3b5d572f608e959af895d781b9b24fbbd Requested by: @anutosh491 >From 39a2186e512e49cdd7512e79eefa36a112e06f39 Mon Sep 17 00:00:00 2001 From: Anutosh Bhat Date: Sat, 26 Apr 2025 12:16:26 +0530 Subject: [PATCH] [clang-repl] : Fix clang-repl crash with --cuda flag (#136404) `clang-repl --cuda` was previously crashing with a segmentation fault, instead of reporting a clean error ``` (base) anutosh491@Anutoshs-MacBook-Air bin % ./clang-repl --cuda #0 0x000111da4fbc llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/opt/local/libexec/llvm-20/lib/libLLVM.dylib+0x150fbc) #1 0x000111da31dc llvm::sys::RunSignalHandlers() (/opt/local/libexec/llvm-20/lib/libLLVM.dylib+0x14f1dc) #2 0x000111da5628 SignalHandler(int) (/opt/local/libexec/llvm-20/lib/libLLVM.dylib+0x151628) #3 0x00019b242de4 (/usr/lib/system/libsystem_platform.dylib+0x180482de4) #4 0x000107f638d0 clang::IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(std::__1::unique_ptr>, clang::CompilerInstance&, llvm::IntrusiveRefCntPtr, llvm::Error&, std::__1::list> const&) (/opt/local/libexec/llvm-20/lib/libclang-cpp.dylib+0x216b8d0) #5 0x000107f638d0 clang::IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(std::__1::unique_ptr>, clang::CompilerInstance&, llvm::IntrusiveRefCntPtr, llvm::Error&, std::__1::list> const&) (/opt/local/libexec/llvm-20/lib/libclang-cpp.dylib+0x216b8d0) #6 0x000107f6bac8 clang::Interpreter::createWithCUDA(std::__1::unique_ptr>, std::__1::unique_ptr>) (/opt/local/libexec/llvm-20/lib/libclang-cpp.dylib+0x2173ac8) #7 0x00010206f8a8 main (/opt/local/libexec/llvm-20/bin/clang-repl+0x138a8) #8 0x00019ae8c274 Segmentation fault: 11 ``` The underlying issue was that the `DeviceCompilerInstance` (used for device-side CUDA compilation) was never initialized with a `Sema`, which is required before constructing the `IncrementalCUDADeviceParser`. https://github.com/llvm/llvm-project/blob/89687e6f383b742a3c6542dc673a84d9f82d02de/clang/lib/Interpreter/DeviceOffload.cpp#L32 https://github.com/llvm/llvm-project/blob/89687e6f383b742a3c6542dc673a84d9f82d02de/clang/lib/Interpreter/IncrementalParser.cpp#L31 Unlike the host-side `CompilerInstance` which runs `ExecuteAction` inside the Interpreter constructor (thereby setting up Sema), the device-side CI was passed into the parser uninitialized, leading to an assertion or crash when accessing its internals. To fix this, I refactored the `Interpreter::create` method to include an optional `DeviceCI` parameter. If provided, we know we need to take care of this instance too. Only then do we construct the `IncrementalCUDADeviceParser`. (cherry picked from commit 21fb19f3b5d572f608e959af895d781b9b24fbbd) --- clang/include/clang/Interpreter/Interpreter.h | 13 ++-- clang/lib/Interpreter/DeviceOffload.cpp | 45 +- clang/lib/Interpreter/DeviceOffload.h | 2 - clang/lib/Interpreter/Interpreter.cpp | 59 ++- 4 files changed, 68 insertions(+), 51 deletions(-) diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h index b1b63aedf86ab..56213f88b9e30 100644 --- a/clang/include/clang/Interpreter/Interpreter.h +++ b/clang/include/clang/Interpreter/Interpreter.h @@ -41,6 +41,7 @@ class CXXRecordDecl; class Decl; class IncrementalExecutor; class IncrementalParser; +class IncrementalCUDADeviceParser; /// Create a pre-configured \c CompilerInstance for incremental processing. class IncrementalCompilerBuilder { @@ -93,7 +94,10 @@ class Interpreter { std::unique_ptr IncrExecutor; // An optional parser for CUDA offloading - std::unique_ptr DeviceParser; + std::unique_ptr DeviceParser; + + // An optional action for CUDA offloading + std::unique_ptr DeviceAct; /// List containing information about each incrementally parsed piece of code. std::list PTUs; @@ -175,10 +179,11 @@ class Interpreter { llvm::Expected ExtractValueFromExpr(Expr *E); llvm::Expected CompileDtorCall(CXXRecordDecl *CXXRD); - CodeGenerator *getCodeGen() const; - std::unique_ptr GenModule(); + CodeGenerator *getCodeGen(IncrementalAction *Action = nullptr) const; + std::unique_ptr GenModule(IncrementalAction *Action = nullptr); PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU, - std::unique_ptr M = {}); + std::unique_ptr M = {}, + IncrementalAction *Action = nullptr); // A cache for the compiled destructors used to for de-allocation of managed // clang::Values. diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp index 1999d63d1aa04..7d0125403ea52 100644 --- a/clang/lib/Interpreter/DeviceOffload.cpp +++ b/clang/lib/Interpreter/DeviceOffload.cpp @@ -31,10 +31,9 @@ Incremental
[llvm-branch-commits] [clang] release/20.x: [clang-repl] : Fix clang-repl crash with --cuda flag (#136404) (PR #137615)
llvmbot wrote: @llvm/pr-subscribers-clang Author: None (llvmbot) Changes Backport 21fb19f3b5d572f608e959af895d781b9b24fbbd Requested by: @anutosh491 --- Full diff: https://github.com/llvm/llvm-project/pull/137615.diff 4 Files Affected: - (modified) clang/include/clang/Interpreter/Interpreter.h (+9-4) - (modified) clang/lib/Interpreter/DeviceOffload.cpp (+15-30) - (modified) clang/lib/Interpreter/DeviceOffload.h (-2) - (modified) clang/lib/Interpreter/Interpreter.cpp (+44-15) ``diff diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h index b1b63aedf86ab..56213f88b9e30 100644 --- a/clang/include/clang/Interpreter/Interpreter.h +++ b/clang/include/clang/Interpreter/Interpreter.h @@ -41,6 +41,7 @@ class CXXRecordDecl; class Decl; class IncrementalExecutor; class IncrementalParser; +class IncrementalCUDADeviceParser; /// Create a pre-configured \c CompilerInstance for incremental processing. class IncrementalCompilerBuilder { @@ -93,7 +94,10 @@ class Interpreter { std::unique_ptr IncrExecutor; // An optional parser for CUDA offloading - std::unique_ptr DeviceParser; + std::unique_ptr DeviceParser; + + // An optional action for CUDA offloading + std::unique_ptr DeviceAct; /// List containing information about each incrementally parsed piece of code. std::list PTUs; @@ -175,10 +179,11 @@ class Interpreter { llvm::Expected ExtractValueFromExpr(Expr *E); llvm::Expected CompileDtorCall(CXXRecordDecl *CXXRD); - CodeGenerator *getCodeGen() const; - std::unique_ptr GenModule(); + CodeGenerator *getCodeGen(IncrementalAction *Action = nullptr) const; + std::unique_ptr GenModule(IncrementalAction *Action = nullptr); PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU, - std::unique_ptr M = {}); + std::unique_ptr M = {}, + IncrementalAction *Action = nullptr); // A cache for the compiled destructors used to for de-allocation of managed // clang::Values. diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp index 1999d63d1aa04..7d0125403ea52 100644 --- a/clang/lib/Interpreter/DeviceOffload.cpp +++ b/clang/lib/Interpreter/DeviceOffload.cpp @@ -31,10 +31,9 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( llvm::Error &Err, const std::list &PTUs) : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS), CodeGenOpts(HostInstance.getCodeGenOpts()), - TargetOpts(HostInstance.getTargetOpts()) { + TargetOpts(DeviceInstance->getTargetOpts()) { if (Err) return; - DeviceCI = std::move(DeviceInstance); StringRef Arch = TargetOpts.CPU; if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) { Err = llvm::joinErrors(std::move(Err), llvm::make_error( @@ -42,34 +41,7 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( llvm::inconvertibleErrorCode())); return; } -} - -llvm::Expected -IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) { - auto PTU = IncrementalParser::Parse(Input); - if (!PTU) -return PTU.takeError(); - - auto PTX = GeneratePTX(); - if (!PTX) -return PTX.takeError(); - - auto Err = GenerateFatbinary(); - if (Err) -return std::move(Err); - - std::string FatbinFileName = - "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin"; - VFS->addFile(FatbinFileName, 0, - llvm::MemoryBuffer::getMemBuffer( - llvm::StringRef(FatbinContent.data(), FatbinContent.size()), - "", false)); - - CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName; - - FatbinContent.clear(); - - return PTU; + DeviceCI = std::move(DeviceInstance); } llvm::Expected IncrementalCUDADeviceParser::GeneratePTX() { @@ -172,6 +144,19 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() { FatbinContent.append(PTXCode.begin(), PTXCode.end()); + const PartialTranslationUnit &PTU = PTUs.back(); + + std::string FatbinFileName = "/" + PTU.TheModule->getName().str() + ".fatbin"; + + VFS->addFile(FatbinFileName, 0, + llvm::MemoryBuffer::getMemBuffer( + llvm::StringRef(FatbinContent.data(), FatbinContent.size()), + "", false)); + + CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName; + + FatbinContent.clear(); + return llvm::Error::success(); } diff --git a/clang/lib/Interpreter/DeviceOffload.h b/clang/lib/Interpreter/DeviceOffload.h index b9a1acab004c3..43645033c4840 100644 --- a/clang/lib/Interpreter/DeviceOffload.h +++ b/clang/lib/Interpreter/DeviceOffload.h @@ -33,8 +33,6 @@ class IncrementalCUDADeviceParser : public IncrementalParser { llvm::IntrusiveRefCntPtr VFS, llvm::Error &Err, const std::list &PTUs); - llvm::Expected Parse(llvm::StringRef Input)
[llvm-branch-commits] [clang] release/20.x: [clang-repl] : Fix clang-repl crash with --cuda flag (#136404) (PR #137615)
llvmbot wrote: @vgvassilev What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/137615 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-repl] Implement LoadDynamicLibrary for clang-repl wasm use cases (#133037) (PR #137616)
llvmbot wrote: @vgvassilev What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/137616 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-repl] Implement LoadDynamicLibrary for clang-repl wasm use cases (#133037) (PR #137616)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/137616 Backport 8f56394487a4d454be0637667267ad37bd636d0f Requested by: @anutosh491 >From b5cb38c1e2fe5ec86a377a7a8e4b31f98051b8a1 Mon Sep 17 00:00:00 2001 From: Anutosh Bhat Date: Tue, 1 Apr 2025 18:03:45 +0530 Subject: [PATCH] [clang-repl] Implement LoadDynamicLibrary for clang-repl wasm use cases (#133037) **Currently we don't make use of the JIT for the wasm use cases so the approach using the execution engine won't work in these cases.** Rather if we use dlopen. We should be able to do the following (demonstrating through a toy project) 1) Make use of LoadDynamicLibrary through the given implementation ``` extern "C" EMSCRIPTEN_KEEPALIVE int load_library(const char *name) { auto Err = Interp->LoadDynamicLibrary(name); if (Err) { llvm::logAllUnhandledErrors(std::move(Err), llvm::errs(), "load_library error: "); return -1; } return 0; } ``` 2) Add a button to call load_library once the library has been added in our MEMFS (currently we have symengine built as a SIDE MODULE and we are loading it) (cherry picked from commit 8f56394487a4d454be0637667267ad37bd636d0f) --- clang/lib/Interpreter/IncrementalExecutor.h | 2 +- clang/lib/Interpreter/Interpreter.cpp | 10 ++ clang/lib/Interpreter/Wasm.cpp | 13 + clang/lib/Interpreter/Wasm.h| 3 +++ 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/clang/lib/Interpreter/IncrementalExecutor.h b/clang/lib/Interpreter/IncrementalExecutor.h index dbd61f0b8b1eb..71d71bc3883e2 100644 --- a/clang/lib/Interpreter/IncrementalExecutor.h +++ b/clang/lib/Interpreter/IncrementalExecutor.h @@ -57,7 +57,7 @@ class IncrementalExecutor { virtual llvm::Error removeModule(PartialTranslationUnit &PTU); virtual llvm::Error runCtors() const; virtual llvm::Error cleanUp(); - llvm::Expected + virtual llvm::Expected getSymbolAddress(llvm::StringRef Name, SymbolNameKind NameKind) const; llvm::orc::LLJIT &GetExecutionEngine() { return *Jit; } diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index fa4c1439c9261..f8c8d0a425659 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -18,6 +18,7 @@ #include "llvm/Support/VirtualFileSystem.h" #ifdef __EMSCRIPTEN__ #include "Wasm.h" +#include #endif // __EMSCRIPTEN__ #include "clang/AST/ASTConsumer.h" @@ -711,6 +712,14 @@ llvm::Error Interpreter::Undo(unsigned N) { } llvm::Error Interpreter::LoadDynamicLibrary(const char *name) { +#ifdef __EMSCRIPTEN__ + void *handle = dlopen(name, RTLD_NOW | RTLD_GLOBAL); + if (!handle) { +llvm::errs() << dlerror() << '\n'; +return llvm::make_error("Failed to load dynamic library", + llvm::inconvertibleErrorCode()); + } +#else auto EE = getExecutionEngine(); if (!EE) return EE.takeError(); @@ -722,6 +731,7 @@ llvm::Error Interpreter::LoadDynamicLibrary(const char *name) { EE->getMainJITDylib().addGenerator(std::move(*DLSG)); else return DLSG.takeError(); +#endif return llvm::Error::success(); } diff --git a/clang/lib/Interpreter/Wasm.cpp b/clang/lib/Interpreter/Wasm.cpp index aa10b160ccf84..74c83169ced6c 100644 --- a/clang/lib/Interpreter/Wasm.cpp +++ b/clang/lib/Interpreter/Wasm.cpp @@ -144,6 +144,19 @@ llvm::Error WasmIncrementalExecutor::cleanUp() { return llvm::Error::success(); } +llvm::Expected +WasmIncrementalExecutor::getSymbolAddress(llvm::StringRef Name, + SymbolNameKind NameKind) const { + void *Sym = dlsym(RTLD_DEFAULT, Name.str().c_str()); + if (!Sym) { +return llvm::make_error("dlsym failed for symbol: " + + Name.str(), + llvm::inconvertibleErrorCode()); + } + + return llvm::orc::ExecutorAddr::fromPtr(Sym); +} + WasmIncrementalExecutor::~WasmIncrementalExecutor() = default; } // namespace clang \ No newline at end of file diff --git a/clang/lib/Interpreter/Wasm.h b/clang/lib/Interpreter/Wasm.h index 4632613326d39..9a752934e3185 100644 --- a/clang/lib/Interpreter/Wasm.h +++ b/clang/lib/Interpreter/Wasm.h @@ -29,6 +29,9 @@ class WasmIncrementalExecutor : public IncrementalExecutor { llvm::Error removeModule(PartialTranslationUnit &PTU) override; llvm::Error runCtors() const override; llvm::Error cleanUp() override; + llvm::Expected + getSymbolAddress(llvm::StringRef Name, + SymbolNameKind NameKind) const override; ~WasmIncrementalExecutor() override; }; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-repl] Implement LoadDynamicLibrary for clang-repl wasm use cases (#133037) (PR #137616)
llvmbot wrote: @llvm/pr-subscribers-clang Author: None (llvmbot) Changes Backport 8f56394487a4d454be0637667267ad37bd636d0f Requested by: @anutosh491 --- Full diff: https://github.com/llvm/llvm-project/pull/137616.diff 4 Files Affected: - (modified) clang/lib/Interpreter/IncrementalExecutor.h (+1-1) - (modified) clang/lib/Interpreter/Interpreter.cpp (+10) - (modified) clang/lib/Interpreter/Wasm.cpp (+13) - (modified) clang/lib/Interpreter/Wasm.h (+3) ``diff diff --git a/clang/lib/Interpreter/IncrementalExecutor.h b/clang/lib/Interpreter/IncrementalExecutor.h index dbd61f0b8b1eb..71d71bc3883e2 100644 --- a/clang/lib/Interpreter/IncrementalExecutor.h +++ b/clang/lib/Interpreter/IncrementalExecutor.h @@ -57,7 +57,7 @@ class IncrementalExecutor { virtual llvm::Error removeModule(PartialTranslationUnit &PTU); virtual llvm::Error runCtors() const; virtual llvm::Error cleanUp(); - llvm::Expected + virtual llvm::Expected getSymbolAddress(llvm::StringRef Name, SymbolNameKind NameKind) const; llvm::orc::LLJIT &GetExecutionEngine() { return *Jit; } diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index fa4c1439c9261..f8c8d0a425659 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -18,6 +18,7 @@ #include "llvm/Support/VirtualFileSystem.h" #ifdef __EMSCRIPTEN__ #include "Wasm.h" +#include #endif // __EMSCRIPTEN__ #include "clang/AST/ASTConsumer.h" @@ -711,6 +712,14 @@ llvm::Error Interpreter::Undo(unsigned N) { } llvm::Error Interpreter::LoadDynamicLibrary(const char *name) { +#ifdef __EMSCRIPTEN__ + void *handle = dlopen(name, RTLD_NOW | RTLD_GLOBAL); + if (!handle) { +llvm::errs() << dlerror() << '\n'; +return llvm::make_error("Failed to load dynamic library", + llvm::inconvertibleErrorCode()); + } +#else auto EE = getExecutionEngine(); if (!EE) return EE.takeError(); @@ -722,6 +731,7 @@ llvm::Error Interpreter::LoadDynamicLibrary(const char *name) { EE->getMainJITDylib().addGenerator(std::move(*DLSG)); else return DLSG.takeError(); +#endif return llvm::Error::success(); } diff --git a/clang/lib/Interpreter/Wasm.cpp b/clang/lib/Interpreter/Wasm.cpp index aa10b160ccf84..74c83169ced6c 100644 --- a/clang/lib/Interpreter/Wasm.cpp +++ b/clang/lib/Interpreter/Wasm.cpp @@ -144,6 +144,19 @@ llvm::Error WasmIncrementalExecutor::cleanUp() { return llvm::Error::success(); } +llvm::Expected +WasmIncrementalExecutor::getSymbolAddress(llvm::StringRef Name, + SymbolNameKind NameKind) const { + void *Sym = dlsym(RTLD_DEFAULT, Name.str().c_str()); + if (!Sym) { +return llvm::make_error("dlsym failed for symbol: " + + Name.str(), + llvm::inconvertibleErrorCode()); + } + + return llvm::orc::ExecutorAddr::fromPtr(Sym); +} + WasmIncrementalExecutor::~WasmIncrementalExecutor() = default; } // namespace clang \ No newline at end of file diff --git a/clang/lib/Interpreter/Wasm.h b/clang/lib/Interpreter/Wasm.h index 4632613326d39..9a752934e3185 100644 --- a/clang/lib/Interpreter/Wasm.h +++ b/clang/lib/Interpreter/Wasm.h @@ -29,6 +29,9 @@ class WasmIncrementalExecutor : public IncrementalExecutor { llvm::Error removeModule(PartialTranslationUnit &PTU) override; llvm::Error runCtors() const override; llvm::Error cleanUp() override; + llvm::Expected + getSymbolAddress(llvm::StringRef Name, + SymbolNameKind NameKind) const override; ~WasmIncrementalExecutor() override; }; `` https://github.com/llvm/llvm-project/pull/137616 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-repl] Implement LoadDynamicLibrary for clang-repl wasm use cases (#133037) (PR #137616)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/137616 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-repl] : Fix clang-repl crash with --cuda flag (#136404) (PR #137615)
https://github.com/vgvassilev approved this pull request. Lgtm! https://github.com/llvm/llvm-project/pull/137615 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-repl] Implement LoadDynamicLibrary for clang-repl wasm use cases (#133037) (PR #137616)
https://github.com/vgvassilev approved this pull request. Lgtm! https://github.com/llvm/llvm-project/pull/137616 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [SystemZ] Fix compile time regression in adjustInliningThreshold(). (#137527) (PR #137628)
https://github.com/uweigand approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/137628 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [LV] Fix crash when building partial reductions using types that aren't known scale factors (#136680) (PR #136863)
NickGuy-Arm wrote: I can verify that updating the test files doesn't impact the test itself. Looks to be some instruction reordering but no change to the functionality being tested, and this test passes on main without any further changes. How do we go about updating the test on this branch, as I assume we don't have commit access to llvmbot's fork. https://github.com/llvm/llvm-project/pull/136863 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [mlir] [mlir][OpenMP] Convert omp.cancellation_point to LLVMIR (PR #137205)
https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/137205 >From 1c6d8d0fc688dcd630b7077d2e7ae190a6c62361 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Tue, 15 Apr 2025 15:40:39 + Subject: [PATCH] [mlir][OpenMP] Convert omp.cancellation_point to LLVMIR This is basically identical to cancel except without the if clause. taskgroup will be implemented in a followup PR. --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 10 + llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 51 + .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 37 +++- .../LLVMIR/openmp-cancellation-point.mlir | 188 ++ mlir/test/Target/LLVMIR/openmp-todo.mlir | 16 +- 5 files changed, 293 insertions(+), 9 deletions(-) create mode 100644 mlir/test/Target/LLVMIR/openmp-cancellation-point.mlir diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 10d69e561a987..14ad8629537f7 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -686,6 +686,16 @@ class OpenMPIRBuilder { Value *IfCondition, omp::Directive CanceledDirective); + /// Generator for '#omp cancellation point' + /// + /// \param Loc The location where the directive was encountered. + /// \param CanceledDirective The kind of directive that is cancled. + /// + /// \returns The insertion point after the barrier. + InsertPointOrErrorTy + createCancellationPoint(const LocationDescription &Loc, + omp::Directive CanceledDirective); + /// Generator for '#omp parallel' /// /// \param Loc The insert and source location description. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 48dd5171e898b..602a1d6fbf24b 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1118,6 +1118,57 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, return Builder.saveIP(); } +OpenMPIRBuilder::InsertPointOrErrorTy +OpenMPIRBuilder::createCancellationPoint(const LocationDescription &Loc, + omp::Directive CanceledDirective) { + if (!updateToLocation(Loc)) +return Loc.IP; + + // LLVM utilities like blocks with terminators. + auto *UI = Builder.CreateUnreachable(); + Builder.SetInsertPoint(UI); + + Value *CancelKind = nullptr; + switch (CanceledDirective) { +#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \ + case DirectiveEnum: \ +CancelKind = Builder.getInt32(Value); \ +break; +#include "llvm/Frontend/OpenMP/OMPKinds.def" + default: +llvm_unreachable("Unknown cancel kind!"); + } + + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; + Value *Result = Builder.CreateCall( + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args); + auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) -> Error { +if (CanceledDirective == OMPD_parallel) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(IP); + return createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), + omp::Directive::OMPD_unknown, + /* ForceSimpleCall */ false, + /* CheckCancelFlag */ false) + .takeError(); +} +return Error::success(); + }; + + // The actual cancel logic is shared with others, e.g., cancel_barriers. + if (Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB)) +return Err; + + // Update the insertion point and remove the terminator we introduced. + Builder.SetInsertPoint(UI->getParent()); + UI->eraseFromParent(); + + return Builder.saveIP(); +} + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel( const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 9d181f12bc773..228c767699d72 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -255,6 +255,9 @@ static LogicalResult checkImplementationStatus(Operation &op) { LogicalResult result = success(); llvm::TypeSwitch(op) .Case([&](omp::CancelOp op) { checkCancelDirective(op, result); }) + .Case([&](omp::CancellationPointOp op) { +
[llvm-branch-commits] [mlir] [mlir][OpenMP] convert wsloop cancellation to LLVMIR (PR #137194)
https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/137194 >From 9a8ed32eaea1a5b6a55e32b5b97f890a1fb8ecf9 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Tue, 15 Apr 2025 15:05:50 + Subject: [PATCH] [mlir][OpenMP] convert wsloop cancellation to LLVMIR Taskloop support will follow in a later patch. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 40 - mlir/test/Target/LLVMIR/openmp-cancel.mlir| 87 +++ mlir/test/Target/LLVMIR/openmp-todo.mlir | 16 3 files changed, 125 insertions(+), 18 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index e172b003176a5..9d181f12bc773 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -161,8 +161,7 @@ static LogicalResult checkImplementationStatus(Operation &op) { auto checkCancelDirective = [&todo](auto op, LogicalResult &result) { omp::ClauseCancellationConstructType cancelledDirective = op.getCancelDirective(); -if (cancelledDirective != omp::ClauseCancellationConstructType::Parallel && -cancelledDirective != omp::ClauseCancellationConstructType::Sections) +if (cancelledDirective == omp::ClauseCancellationConstructType::Taskgroup) result = todo("cancel directive construct type not yet supported"); }; auto checkDepend = [&todo](auto op, LogicalResult &result) { @@ -2358,6 +2357,30 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop : llvm::omp::WorksharingLoopType::ForStaticLoop; + SmallVector cancelTerminators; + // This callback is invoked only if there is cancellation inside of the wsloop + // body. + auto finiCB = [&](llvm::OpenMPIRBuilder::InsertPointTy ip) -> llvm::Error { +llvm::IRBuilderBase &llvmBuilder = ompBuilder->Builder; +llvm::IRBuilderBase::InsertPointGuard guard(llvmBuilder); + +// ip is currently in the block branched to if cancellation occured. +// We need to create a branch to terminate that block. +llvmBuilder.restoreIP(ip); + +// We must still clean up the wsloop after cancelling it, so we need to +// branch to the block that finalizes the wsloop. +// That block has not been created yet so use this block as a dummy for now +// and fix this after creating the wsloop. +cancelTerminators.push_back(llvmBuilder.CreateBr(ip.getBlock())); +return llvm::Error::success(); + }; + // We have to add the cleanup to the OpenMPIRBuilder before the body gets + // created in case the body contains omp.cancel (which will then expect to be + // able to find this cleanup callback). + ompBuilder->pushFinalizationCB({finiCB, llvm::omp::Directive::OMPD_for, + constructIsCancellable(wsloopOp)}); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::Expected regionBlock = convertOmpOpRegions( wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation); @@ -2379,6 +2402,19 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, if (failed(handleError(wsloopIP, opInst))) return failure(); + ompBuilder->popFinalizationCB(); + if (!cancelTerminators.empty()) { +// If we cancelled the loop, we should branch to the finalization block of +// the wsloop (which is always immediately before the loop continuation +// block). Now the finalization has been created, we can fix the branch. +llvm::BasicBlock *wsloopFini = wsloopIP->getBlock()->getSinglePredecessor(); +for (llvm::BranchInst *cancelBranch : cancelTerminators) { + assert(cancelBranch->getNumSuccessors() == 1 && + "cancel branch should have one target"); + cancelBranch->setSuccessor(0, wsloopFini); +} + } + // Process the reductions if required. if (failed(createReductionsAndCleanup( wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls, diff --git a/mlir/test/Target/LLVMIR/openmp-cancel.mlir b/mlir/test/Target/LLVMIR/openmp-cancel.mlir index fca16b936fc85..3c195a98d1000 100644 --- a/mlir/test/Target/LLVMIR/openmp-cancel.mlir +++ b/mlir/test/Target/LLVMIR/openmp-cancel.mlir @@ -156,3 +156,90 @@ llvm.func @cancel_sections_if(%cond : i1) { // CHECK: ret void // CHECK: .cncl:; preds = %[[VAL_27]] // CHECK: br label %[[VAL_19]] + +llvm.func @cancel_wsloop_if(%lb : i32, %ub : i32, %step : i32, %cond : i1) { + omp.wsloop { +omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.cancel cancellation_construct_type(loop) if(%cond) + omp.yield +} + } + llvm.return +} +// CHECK-LABEL: define void @cancel_wsloop_if +// CHECK: %[[VAL_0:.*]] = alloca i32, align 4 +// CHECK:
[llvm-branch-commits] [llvm] [mlir] [mlir][OpenMP] Convert omp.cancellation_point to LLVMIR (PR #137205)
tblah wrote: Thanks for taking a look Michael. I think the issue was actually in an earlier patch in my series, but the UB was only triggering intermittently. The fix for Windows builds is https://github.com/llvm/llvm-project/pull/137193/commits/6c678b739d4fab204862ee057e00c3b0cc4c1946 https://github.com/llvm/llvm-project/pull/137205 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [mlir] [mlir][OpenMP] Convert omp.cancellation_point to LLVMIR (PR #137205)
https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/137205 >From 1c6d8d0fc688dcd630b7077d2e7ae190a6c62361 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Tue, 15 Apr 2025 15:40:39 + Subject: [PATCH 1/2] [mlir][OpenMP] Convert omp.cancellation_point to LLVMIR This is basically identical to cancel except without the if clause. taskgroup will be implemented in a followup PR. --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 10 + llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 51 + .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 37 +++- .../LLVMIR/openmp-cancellation-point.mlir | 188 ++ mlir/test/Target/LLVMIR/openmp-todo.mlir | 16 +- 5 files changed, 293 insertions(+), 9 deletions(-) create mode 100644 mlir/test/Target/LLVMIR/openmp-cancellation-point.mlir diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 10d69e561a987..14ad8629537f7 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -686,6 +686,16 @@ class OpenMPIRBuilder { Value *IfCondition, omp::Directive CanceledDirective); + /// Generator for '#omp cancellation point' + /// + /// \param Loc The location where the directive was encountered. + /// \param CanceledDirective The kind of directive that is cancled. + /// + /// \returns The insertion point after the barrier. + InsertPointOrErrorTy + createCancellationPoint(const LocationDescription &Loc, + omp::Directive CanceledDirective); + /// Generator for '#omp parallel' /// /// \param Loc The insert and source location description. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 48dd5171e898b..602a1d6fbf24b 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1118,6 +1118,57 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, return Builder.saveIP(); } +OpenMPIRBuilder::InsertPointOrErrorTy +OpenMPIRBuilder::createCancellationPoint(const LocationDescription &Loc, + omp::Directive CanceledDirective) { + if (!updateToLocation(Loc)) +return Loc.IP; + + // LLVM utilities like blocks with terminators. + auto *UI = Builder.CreateUnreachable(); + Builder.SetInsertPoint(UI); + + Value *CancelKind = nullptr; + switch (CanceledDirective) { +#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \ + case DirectiveEnum: \ +CancelKind = Builder.getInt32(Value); \ +break; +#include "llvm/Frontend/OpenMP/OMPKinds.def" + default: +llvm_unreachable("Unknown cancel kind!"); + } + + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; + Value *Result = Builder.CreateCall( + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args); + auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) -> Error { +if (CanceledDirective == OMPD_parallel) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(IP); + return createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), + omp::Directive::OMPD_unknown, + /* ForceSimpleCall */ false, + /* CheckCancelFlag */ false) + .takeError(); +} +return Error::success(); + }; + + // The actual cancel logic is shared with others, e.g., cancel_barriers. + if (Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB)) +return Err; + + // Update the insertion point and remove the terminator we introduced. + Builder.SetInsertPoint(UI->getParent()); + UI->eraseFromParent(); + + return Builder.saveIP(); +} + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel( const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 9d181f12bc773..228c767699d72 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -255,6 +255,9 @@ static LogicalResult checkImplementationStatus(Operation &op) { LogicalResult result = success(); llvm::TypeSwitch(op) .Case([&](omp::CancelOp op) { checkCancelDirective(op, result); }) + .Case([&](omp::CancellationPointOp op) {
[llvm-branch-commits] [llvm] [AMDGPU] Remove the pass `AMDGPUPromoteKernelArguments` (PR #137655)
@@ -11,11 +10,9 @@ define amdgpu_kernel void @ptr_nest_3(ptr addrspace(1) nocapture readonly %Arg) ; CHECK-NEXT: entry: ; CHECK-NEXT:[[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT:[[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i32 [[I]] -; CHECK-NEXT:[[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber [[META0:![0-9]+]] -; CHECK-NEXT:[[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) -; CHECK-NEXT:[[P3:%.*]] = load ptr, ptr addrspace(1) [[P2_GLOBAL]], align 8, !amdgpu.noclobber [[META0]] -; CHECK-NEXT:[[P3_GLOBAL:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) -; CHECK-NEXT:store float 0.00e+00, ptr addrspace(1) [[P3_GLOBAL]], align 4 +; CHECK-NEXT:[[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8 +; CHECK-NEXT:[[P3:%.*]] = load ptr, ptr [[P2]], align 8 arsenm wrote: > That said, this case can't really be handled by infer-address-space or > anything else at the moment. Wouldn't this be covered by checking if this was a invariant load of pointer from a kernel argument https://github.com/llvm/llvm-project/pull/137655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] f7159a0 - Revert "[lldb][Format] Make function name frame-format variables work without…"
Author: Michael Buch Date: 2025-04-29T07:03:26+01:00 New Revision: f7159a00fdd1950dbac5c24f532a13af76af44a5 URL: https://github.com/llvm/llvm-project/commit/f7159a00fdd1950dbac5c24f532a13af76af44a5 DIFF: https://github.com/llvm/llvm-project/commit/f7159a00fdd1950dbac5c24f532a13af76af44a5.diff LOG: Revert "[lldb][Format] Make function name frame-format variables work without…" This reverts commit cebf86eb1de163faaf5f9781f6bbded70dc1f9f0. Added: Modified: lldb/source/Core/FormatEntity.cpp lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp lldb/test/Shell/Settings/TestFrameFormatFunctionBasename.test lldb/test/Shell/Settings/TestFrameFormatFunctionFormattedArguments.test lldb/test/Shell/Settings/TestFrameFormatFunctionQualifiers.test lldb/test/Shell/Settings/TestFrameFormatFunctionReturn.test lldb/test/Shell/Settings/TestFrameFormatFunctionScope.test lldb/test/Shell/Settings/TestFrameFormatFunctionTemplateArguments.test Removed: diff --git a/lldb/source/Core/FormatEntity.cpp b/lldb/source/Core/FormatEntity.cpp index a2410048e5a89..6cdfcfedf8be5 100644 --- a/lldb/source/Core/FormatEntity.cpp +++ b/lldb/source/Core/FormatEntity.cpp @@ -1809,12 +1809,11 @@ bool FormatEntity::Format(const Entry &entry, Stream &s, case Entry::Type::FunctionReturnRight: case Entry::Type::FunctionReturnLeft: case Entry::Type::FunctionQualifiers: { -Language *language_plugin = nullptr; -if (sc->function) - language_plugin = Language::FindPlugin(sc->function->GetLanguage()); -else if (sc->symbol) - language_plugin = Language::FindPlugin(sc->symbol->GetLanguage()); +if (!sc->function) + return false; +Language *language_plugin = +Language::FindPlugin(sc->function->GetLanguage()); if (!language_plugin) return false; diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index ab8e9883868ce..283e867d53bb7 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -381,34 +381,6 @@ GetDemangledScope(const SymbolContext &sc) { return demangled_name.slice(info->ScopeRange.first, info->ScopeRange.second); } -static bool PrintDemangledArgumentList(Stream &s, const SymbolContext &sc) { - assert(sc.symbol); - - Mangled mangled = sc.GetPossiblyInlinedFunctionName(); - if (!mangled) -return false; - - auto demangled_name = mangled.GetDemangledName().GetStringRef(); - if (demangled_name.empty()) -return false; - - const std::optional &info = mangled.GetDemangledInfo(); - if (!info) -return false; - - // Function without a basename is nonsense. - if (!info->hasBasename()) -return false; - - if (info->ArgumentsRange.second < info->ArgumentsRange.first) -return false; - - s << demangled_name.slice(info->ArgumentsRange.first, -info->ArgumentsRange.second); - - return true; -} - bool CPlusPlusLanguage::CxxMethodName::TrySimplifiedParse() { // This method tries to parse simple method definitions which are presumably // most comman in user programs. Definitions that can be parsed by this @@ -1918,6 +1890,8 @@ bool CPlusPlusLanguage::GetFunctionDisplayName( bool CPlusPlusLanguage::HandleFrameFormatVariable( const SymbolContext &sc, const ExecutionContext *exe_ctx, FormatEntity::Entry::Type type, Stream &s) { + assert(sc.function); + switch (type) { case FormatEntity::Entry::Type::FunctionScope: { std::optional scope = GetDemangledScope(sc); @@ -1951,14 +1925,6 @@ bool CPlusPlusLanguage::HandleFrameFormatVariable( } case FormatEntity::Entry::Type::FunctionFormattedArguments: { -// This ensures we print the arguments even when no debug-info is available. -// -// FIXME: we should have a Entry::Type::FunctionArguments and -// use it in the plugin.cplusplus.display.function-name-format -// once we have a "fallback operator" in the frame-format language. -if (!sc.function && sc.symbol) - return PrintDemangledArgumentList(s, sc); - VariableList args; if (auto variable_list_sp = GetFunctionVariableList(sc)) variable_list_sp->AppendVariablesWithScope(eValueTypeVariableArgument, diff --git a/lldb/test/Shell/Settings/TestFrameFormatFunctionBasename.test b/lldb/test/Shell/Settings/TestFrameFormatFunctionBasename.test index c0008e50927b1..61af2b49886ec 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatFunctionBasename.test +++ b/lldb/test/Shell/Settings/TestFrameFormatFunctionBasename.test @@ -7,10 +7,6 @@ # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s # -# RUN: %clang_host -O0 %t/main.cpp -o %t-nodebug.out -# RUN: %lldb -x -b -s %t/commands.input %t-nodebug.out -o exit 2>&1 \ -# RUN:
[llvm-branch-commits] [llvm] [AArch64][llvm] Pre-commit tests for #xxxxx (NFC) (PR #137702)
llvmbot wrote: @llvm/pr-subscribers-backend-aarch64 Author: Jonathan Thackray (jthackray) Changes Add pre-commit tests for lowering atomicrmw `fminimum`/`fmaximum` to AArch64 assembler, in a subsequent change. --- Patch is 153.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137702.diff 5 Files Affected: - (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll (+1204) - (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a_fp.ll (+960) - (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll (+1246) - (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a_fp.ll (+990) - (modified) llvm/test/CodeGen/AArch64/Atomics/generate-tests.py (+2) ``diff diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll index ef1f11e71f433..7ee2a0bb19c0e 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll @@ -1598,3 +1598,1207 @@ define dso_local double @atomicrmw_fmin_double_unaligned_seq_cst(ptr %ptr, doubl %r = atomicrmw fmin ptr %ptr, double %value seq_cst, align 1 ret double %r } + +define dso_local half @atomicrmw_fmaximum_half_aligned_monotonic(ptr %ptr, half %value) { +; -O0-LABEL: atomicrmw_fmaximum_half_aligned_monotonic: +; -O0:ldaxrh w0, [x9] +; -O0:cmp w0, w10, uxth +; -O0:stlxrh w8, w11, [x9] +; -O0:subs w8, w8, w0, uxth +; +; -O1-LABEL: atomicrmw_fmaximum_half_aligned_monotonic: +; -O1:ldxrh w8, [x0] +; -O1:stxrh w9, w8, [x0] +%r = atomicrmw fmaximum ptr %ptr, half %value monotonic, align 2 +ret half %r +} + +define dso_local void @atomicrmw_fmaximum_half_aligned_monotonic_unused(ptr %ptr, half %value) { +; -O0-LABEL: atomicrmw_fmaximum_half_aligned_monotonic_unused: +; -O0:ldaxrh w0, [x9] +; -O0:cmp w0, w10, uxth +; -O0:stlxrh w8, w11, [x9] +; -O0:subs w8, w8, w0, uxth +; +; -O1-LABEL: atomicrmw_fmaximum_half_aligned_monotonic_unused: +; -O1:ldxrh w8, [x0] +; -O1:stxrh w9, w8, [x0] +%r = atomicrmw fmaximum ptr %ptr, half %value monotonic, align 2 +ret void +} + +define dso_local half @atomicrmw_fmaximum_half_aligned_acquire(ptr %ptr, half %value) { +; -O0-LABEL: atomicrmw_fmaximum_half_aligned_acquire: +; -O0:ldaxrh w0, [x9] +; -O0:cmp w0, w10, uxth +; -O0:stlxrh w8, w11, [x9] +; -O0:subs w8, w8, w0, uxth +; +; -O1-LABEL: atomicrmw_fmaximum_half_aligned_acquire: +; -O1:ldaxrh w8, [x0] +; -O1:stxrh w9, w8, [x0] +%r = atomicrmw fmaximum ptr %ptr, half %value acquire, align 2 +ret half %r +} + +define dso_local half @atomicrmw_fmaximum_half_aligned_release(ptr %ptr, half %value) { +; -O0-LABEL: atomicrmw_fmaximum_half_aligned_release: +; -O0:ldaxrh w0, [x9] +; -O0:cmp w0, w10, uxth +; -O0:stlxrh w8, w11, [x9] +; -O0:subs w8, w8, w0, uxth +; +; -O1-LABEL: atomicrmw_fmaximum_half_aligned_release: +; -O1:ldxrh w8, [x0] +; -O1:stlxrh w9, w8, [x0] +%r = atomicrmw fmaximum ptr %ptr, half %value release, align 2 +ret half %r +} + +define dso_local void @atomicrmw_fmaximum_half_aligned_release_unused(ptr %ptr, half %value) { +; -O0-LABEL: atomicrmw_fmaximum_half_aligned_release_unused: +; -O0:ldaxrh w0, [x9] +; -O0:cmp w0, w10, uxth +; -O0:stlxrh w8, w11, [x9] +; -O0:subs w8, w8, w0, uxth +; +; -O1-LABEL: atomicrmw_fmaximum_half_aligned_release_unused: +; -O1:ldxrh w8, [x0] +; -O1:stlxrh w9, w8, [x0] +%r = atomicrmw fmaximum ptr %ptr, half %value release, align 2 +ret void +} + +define dso_local half @atomicrmw_fmaximum_half_aligned_acq_rel(ptr %ptr, half %value) { +; -O0-LABEL: atomicrmw_fmaximum_half_aligned_acq_rel: +; -O0:ldaxrh w0, [x9] +; -O0:cmp w0, w10, uxth +; -O0:stlxrh w8, w11, [x9] +; -O0:subs w8, w8, w0, uxth +; +; -O1-LABEL: atomicrmw_fmaximum_half_aligned_acq_rel: +; -O1:ldaxrh w8, [x0] +; -O1:stlxrh w9, w8, [x0] +%r = atomicrmw fmaximum ptr %ptr, half %value acq_rel, align 2 +ret half %r +} + +define dso_local half @atomicrmw_fmaximum_half_aligned_seq_cst(ptr %ptr, half %value) { +; -O0-LABEL: atomicrmw_fmaximum_half_aligned_seq_cst: +; -O0:ldaxrh w0, [x9] +; -O0:cmp w0, w10, uxth +; -O0:stlxrh w8, w11, [x9] +; -O0:subs w8, w8, w0, uxth +; +; -O1-LABEL: atomicrmw_fmaximum_half_aligned_seq_cst: +; -O1:ldaxrh w8, [x0] +; -O1:stlxrh w9, w8, [x0] +%r = atomicrmw fmaximum ptr %ptr, half %value seq_cst, align 2 +ret half %r +} + +define dso_local bfloat @atomicrmw_fmaximum_bfloat_aligned_monotonic(ptr %ptr, bfloat %value) { +; -O0-LABEL: atomicrmw_fmaximum_bfloat_aligned_monotonic: +; -O0:add w8, w8, w9 +; -O0:add w8, w8, w9 +; -O0:ldaxrh w9, [x11] +; -O0:cmp w9, w8, uxth +; -O0:stlxrh w10, w12, [x11] +; -O0:subs w8, w9, w8, uxth +; -O0:subs