[llvm-branch-commits] [llvm] release/20.x: [Hexagon] Explicitly truncate constant in UAddSubO (#127360) (PR #127527)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/127527 Backport 788cb725d8b92a82e41e64540dccca97c9086a58 Requested by: @nikic >From 858d79c579b55d3785136c09a9d7082538204451 Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Mon, 17 Feb 2025 09:30:48 -0600 Subject: [PATCH] [Hexagon] Explicitly truncate constant in UAddSubO (#127360) After #117558 landed, this code would assert "Value is not an N-bit unsigned value" in getConstant(), from a test case in zig. Co-authored-by: Craig Topper Fixes #127296 (cherry picked from commit 788cb725d8b92a82e41e64540dccca97c9086a58) --- .../lib/Target/Hexagon/HexagonISelLowering.cpp | 2 +- llvm/test/CodeGen/Hexagon/iss127296.ll | 18 ++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Hexagon/iss127296.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 12ca0c505bd06..5ce5cae2ff906 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3273,7 +3273,7 @@ HexagonTargetLowering::LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const { if (Opc == ISD::USUBO) { SDValue Op = DAG.getNode(ISD::SUB, dl, VTs.VTs[0], {X, Y}); SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op, -DAG.getConstant(-1, dl, ty(Op)), ISD::SETEQ); +DAG.getAllOnesConstant(dl, ty(Op)), ISD::SETEQ); return DAG.getMergeValues({Op, Ov}, dl); } } diff --git a/llvm/test/CodeGen/Hexagon/iss127296.ll b/llvm/test/CodeGen/Hexagon/iss127296.ll new file mode 100644 index 0..bf0e7a9881014 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/iss127296.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=hexagon -O0 < %s | FileCheck %s + +; CHECK: r0 = add(r0,#-1) + +define fastcc void @os.linux.tls.initStatic(i32 %x) { + %1 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 1) + br label %2 + + 2:; preds = %0 + %3 = extractvalue { i32, i1 } %1, 0 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [Hexagon] Explicitly truncate constant in UAddSubO (#127360) (PR #127527)
llvmbot wrote: @topperc What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/127527 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RegAlloc][NewPM] Plug Greedy RA in codegen pipeline (PR #120557)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/120557 >From 64fc22034753b75dceb4d1f51ab2de88649d291a Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Tue, 11 Feb 2025 12:36:40 + Subject: [PATCH 1/4] [CodeGen][NewPM] Plug greedy RA in codegen pipeline --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 51 ++- .../llvm/Passes/MachinePassRegistry.def | 4 +- .../include/llvm/Target/CGPassBuilderOption.h | 4 +- llvm/lib/Passes/PassBuilder.cpp | 14 + ...plicit-def-remat-requires-impdef-check.mir | 1 + ...implicit-def-with-impdef-greedy-assert.mir | 1 + llvm/test/CodeGen/AArch64/pr51516.mir | 1 + llvm/test/CodeGen/AArch64/spill-fold.mir | 2 + llvm/test/CodeGen/MIR/Generic/runPass.mir | 1 + .../SystemZ/clear-liverange-spillreg.mir | 1 + llvm/test/CodeGen/Thumb/high-reg-clobber.mir | 1 + llvm/test/CodeGen/X86/limit-split-cost.mir| 1 + llvm/tools/llc/NewPMDriver.cpp| 15 -- 13 files changed, 75 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index ca065d67eacef..d895eee9bf4da 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -1062,7 +1062,9 @@ void CodeGenPassBuilder::addMachineSSAOptimization( /// /// A target that uses the standard regalloc pass order for fast or optimized /// allocation may still override this for per-target regalloc -/// selection. But -regalloc=... always takes precedence. +/// selection. But -regalloc-npm=... always takes precedence. +/// If a target does not want to allow users to set -regalloc-npm=... at all, +/// check if Opt.RegAlloc == RegAllocType::Unset. template void CodeGenPassBuilder::addTargetRegisterAllocator( AddMachinePass &addPass, bool Optimized) const { @@ -1075,10 +1077,29 @@ void CodeGenPassBuilder::addTargetRegisterAllocator( /// Find and instantiate the register allocation pass requested by this target /// at the current optimization level. Different register allocators are /// defined as separate passes because they may require different analysis. +/// +/// This helper ensures that the -regalloc-npm= option is always available, +/// even for targets that override the default allocator. template void CodeGenPassBuilder::addRegAllocPass( AddMachinePass &addPass, bool Optimized) const { - // TODO: Parse Opt.RegAlloc to add register allocator. + // Use the specified -regalloc-npm={basic|greedy|fast|pbqp} + if (Opt.RegAlloc > RegAllocType::Default) { +switch (Opt.RegAlloc) { +case RegAllocType::Fast: + addPass(RegAllocFastPass()); + break; +case RegAllocType::Greedy: + addPass(RAGreedyPass()); + break; +default: + report_fatal_error("register allocator not supported yet.", false); +} +return; + } + // -regalloc=default or unspecified, so pick based on the optimization level + // or ask the target for the regalloc pass. + derived().addTargetRegisterAllocator(addPass, Optimized); } template @@ -1149,20 +1170,22 @@ void CodeGenPassBuilder::addOptimizedRegAlloc( // PreRA instruction scheduling. addPass(MachineSchedulerPass(&TM)); - if (derived().addRegAssignmentOptimized(addPass)) { -// Allow targets to expand pseudo instructions depending on the choice of -// registers before MachineCopyPropagation. -derived().addPostRewrite(addPass); + if (auto E = derived().addRegAssignmentOptimized(addPass)) { +// addRegAssignmentOptimized did not add a reg alloc pass, so do nothing. +return; + } + // Allow targets to expand pseudo instructions depending on the choice of + // registers before MachineCopyPropagation. + derived().addPostRewrite(addPass); -// Copy propagate to forward register uses and try to eliminate COPYs that -// were not coalesced. -addPass(MachineCopyPropagationPass()); + // Copy propagate to forward register uses and try to eliminate COPYs that + // were not coalesced. + addPass(MachineCopyPropagationPass()); -// Run post-ra machine LICM to hoist reloads / remats. -// -// FIXME: can this move into MachineLateOptimization? -addPass(MachineLICMPass()); - } + // Run post-ra machine LICM to hoist reloads / remats. + // + // FIXME: can this move into MachineLateOptimization? + addPass(MachineLICMPass()); } //===-===// diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 45abbc3b02e75..3199337f065fb 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -195,12 +195,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS( }, "filter=reg-filter;no-clear-vregs") +// 'all' is the default filter MACHINE_FUNCTION_PASS_WITH_PARAMS(
[llvm-branch-commits] [libcxx] release/20.x: [libc++][TZDB] Fixes mapping of nonexisting time. (#127330) (PR #127531)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/127531 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libc++][TZDB] Fixes mapping of nonexisting time. (#127330) (PR #127531)
llvmbot wrote: @llvm/pr-subscribers-libcxx Author: None (llvmbot) Changes Backport 941f7cbf5a3e7aa9f36b002dc22cfdb4ff50fea8 Requested by: @mordante --- Full diff: https://github.com/llvm/llvm-project/pull/127531.diff 2 Files Affected: - (modified) libcxx/include/__chrono/time_zone.h (+6-2) - (modified) libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp (+15-2) ``diff diff --git a/libcxx/include/__chrono/time_zone.h b/libcxx/include/__chrono/time_zone.h index ab5c22eceaaf1..d18d59d2736bf 100644 --- a/libcxx/include/__chrono/time_zone.h +++ b/libcxx/include/__chrono/time_zone.h @@ -103,10 +103,14 @@ class _LIBCPP_AVAILABILITY_TZDB time_zone { to_sys(const local_time<_Duration>& __time, choose __z) const { local_info __info = get_info(__time); switch (__info.result) { -case local_info::unique: -case local_info::nonexistent: // first and second are the same +case local_info::unique: // first and second are the same return sys_time>{__time.time_since_epoch() - __info.first.offset}; +case local_info::nonexistent: + // first and second are the same + // All non-existing values are converted to the same time. + return sys_time>{__info.first.end}; + case local_info::ambiguous: switch (__z) { case choose::earliest: diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp index bad4ef352e9b9..1147c9fadf9ae 100644 --- a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp @@ -88,7 +88,7 @@ static void test_nonexistent() { // Pick an historic date where it's well known what the time zone rules were. // This makes it unlikely updates to the database change these rules. std::chrono::local_time time{ - (std::chrono::sys_days{std::chrono::March / 30 / 1986} + 2h + 30min).time_since_epoch()}; + (std::chrono::sys_days{std::chrono::March / 30 / 1986} + 2h).time_since_epoch()}; std::chrono::sys_seconds expected{time.time_since_epoch() - 1h}; @@ -100,6 +100,13 @@ static void test_nonexistent() { assert(tz->to_sys(time + 0us, std::chrono::choose::latest) == expected); assert(tz->to_sys(time + 0ms, std::chrono::choose::earliest) == expected); assert(tz->to_sys(time + 0s, std::chrono::choose::latest) == expected); + + // The entire nonexisting hour should map to the same time. + // For nonexistant the value of std::chrono::choose has no effect. + assert(tz->to_sys(time + 1s, std::chrono::choose::earliest) == expected); + assert(tz->to_sys(time + 1min, std::chrono::choose::latest) == expected); + assert(tz->to_sys(time + 30min, std::chrono::choose::earliest) == expected); + assert(tz->to_sys(time + 59min + 59s, std::chrono::choose::latest) == expected); } // Tests ambiguous conversions. @@ -120,7 +127,7 @@ static void test_ambiguous() { // Pick an historic date where it's well known what the time zone rules were. // This makes it unlikely updates to the database change these rules. std::chrono::local_time time{ - (std::chrono::sys_days{std::chrono::September / 28 / 1986} + 2h + 30min).time_since_epoch()}; + (std::chrono::sys_days{std::chrono::September / 28 / 1986} + 2h).time_since_epoch()}; std::chrono::sys_seconds earlier{time.time_since_epoch() - 2h}; std::chrono::sys_seconds later{time.time_since_epoch() - 1h}; @@ -133,6 +140,12 @@ static void test_ambiguous() { assert(tz->to_sys(time + 0us, std::chrono::choose::latest) == later); assert(tz->to_sys(time + 0ms, std::chrono::choose::earliest) == earlier); assert(tz->to_sys(time + 0s, std::chrono::choose::latest) == later); + + // Test times in the ambigious hour + assert(tz->to_sys(time + 1s, std::chrono::choose::earliest) == earlier + 1s); + assert(tz->to_sys(time + 1min, std::chrono::choose::latest) == later + 1min); + assert(tz->to_sys(time + 30min, std::chrono::choose::earliest) == earlier + 30min); + assert(tz->to_sys(time + 59min + 59s, std::chrono::choose::latest) == later + 59min + 59s); } // This test does the basic validations of this function. The library function `` https://github.com/llvm/llvm-project/pull/127531 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/20.x: [ELF] Refine isExported/isPreemptible condition (PR #126848)
https://github.com/MaskRay approved this pull request. https://github.com/llvm/llvm-project/pull/126848 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Push amdgpu-preload-kern-arg-prolog after livedebugvalues (PR #126148)
https://github.com/slinder1 updated https://github.com/llvm/llvm-project/pull/126148 >From 075a702713d78b66b495a0903d069af0cd1bf97e Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Thu, 6 Feb 2025 00:01:07 + Subject: [PATCH] [AMDGPU] Push amdgpu-preload-kern-arg-prolog after livedebugvalues This is effectively a workaround for a bug in livedebugvalues, but seems to potentially be a general improvement, as BB sections seems like it could ruin the special 256-byte prelude scheme that amdgpu-preload-kern-arg-prolog requires anyway. Moving it even later doesn't seem to have any material impact, and just adds livedebugvalues to the list of things which no longer have to deal with pseudo multiple-entry functions. AMDGPU debug-info isn't supported upstream yet, so the bug being avoided isn't testable here. I am posting the patch upstream to avoid an unnecessary diff with AMD's fork. --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 6 ++ llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 10 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index eb488843b53e0..92ab106dd4a98 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1151,6 +1151,7 @@ class GCNPassConfig final : public AMDGPUPassConfig { void addPostRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; + void addPostBBSections() override; }; } // end anonymous namespace @@ -1690,6 +1691,11 @@ void GCNPassConfig::addPreEmitPass() { addPass(&AMDGPUInsertDelayAluID); addPass(&BranchRelaxationPassID); +} + +void GCNPassConfig::addPostBBSections() { + // We run this later to avoid passes like livedebugvalues and BBSections + // having to deal with the apparent multi-entry functions we may generate. addPass(createAMDGPUPreloadKernArgPrologLegacyPass()); } diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 893b9fa6fb40d..d7f54f3b8e9e2 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -145,11 +145,11 @@ ; GCN-O0-NEXT:Post RA hazard recognizer ; GCN-O0-NEXT:AMDGPU Insert waits for SGPR read hazards ; GCN-O0-NEXT:Branch relaxation pass -; GCN-O0-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O0-NEXT:Register Usage Information Collector Pass ; GCN-O0-NEXT:Remove Loads Into Fake Uses ; GCN-O0-NEXT:Live DEBUG_VALUE analysis ; GCN-O0-NEXT:Machine Sanitizer Binary Metadata +; GCN-O0-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O0-NEXT:Lazy Machine Block Frequency Analysis ; GCN-O0-NEXT:Machine Optimization Remark Emitter ; GCN-O0-NEXT:Stack Frame Layout Analysis @@ -430,11 +430,11 @@ ; GCN-O1-NEXT:AMDGPU Insert waits for SGPR read hazards ; GCN-O1-NEXT:AMDGPU Insert Delay ALU ; GCN-O1-NEXT:Branch relaxation pass -; GCN-O1-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O1-NEXT:Register Usage Information Collector Pass ; GCN-O1-NEXT:Remove Loads Into Fake Uses ; GCN-O1-NEXT:Live DEBUG_VALUE analysis ; GCN-O1-NEXT:Machine Sanitizer Binary Metadata +; GCN-O1-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O1-NEXT:Lazy Machine Block Frequency Analysis ; GCN-O1-NEXT:Machine Optimization Remark Emitter ; GCN-O1-NEXT:Stack Frame Layout Analysis @@ -743,11 +743,11 @@ ; GCN-O1-OPTS-NEXT:AMDGPU Insert waits for SGPR read hazards ; GCN-O1-OPTS-NEXT:AMDGPU Insert Delay ALU ; GCN-O1-OPTS-NEXT:Branch relaxation pass -; GCN-O1-OPTS-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O1-OPTS-NEXT:Register Usage Information Collector Pass ; GCN-O1-OPTS-NEXT:Remove Loads Into Fake Uses ; GCN-O1-OPTS-NEXT:Live DEBUG_VALUE analysis ; GCN-O1-OPTS-NEXT:Machine Sanitizer Binary Metadata +; GCN-O1-OPTS-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O1-OPTS-NEXT:Lazy Machine Block Frequency Analysis ; GCN-O1-OPTS-NEXT:Machine Optimization Remark Emitter ; GCN-O1-OPTS-NEXT:Stack Frame Layout Analysis @@ -1062,11 +1062,11 @@ ; GCN-O2-NEXT:AMDGPU Insert waits for SGPR read hazards ; GCN-O2-NEXT:AMDGPU Insert Delay ALU ; GCN-O2-NEXT:Branch relaxation pass -; GCN-O2-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O2-NEXT:Register Usage Information Collector Pass ; GCN-O2-NEXT:Remove Loads Into Fake Uses ; GCN-O2-NEXT:Live DEBUG_VALUE analysis ; GCN-O2-NEXT:Machine Sanitizer Binary Metadata +; GCN-O2-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O2-NEXT:Lazy Machine Block Frequency Analysis ; GCN-O2-NEXT:Ma
[llvm-branch-commits] [llvm] [AMDGPU] Push amdgpu-preload-kern-arg-prolog after livedebugvalues (PR #126148)
https://github.com/slinder1 updated https://github.com/llvm/llvm-project/pull/126148 >From 075a702713d78b66b495a0903d069af0cd1bf97e Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Thu, 6 Feb 2025 00:01:07 + Subject: [PATCH] [AMDGPU] Push amdgpu-preload-kern-arg-prolog after livedebugvalues This is effectively a workaround for a bug in livedebugvalues, but seems to potentially be a general improvement, as BB sections seems like it could ruin the special 256-byte prelude scheme that amdgpu-preload-kern-arg-prolog requires anyway. Moving it even later doesn't seem to have any material impact, and just adds livedebugvalues to the list of things which no longer have to deal with pseudo multiple-entry functions. AMDGPU debug-info isn't supported upstream yet, so the bug being avoided isn't testable here. I am posting the patch upstream to avoid an unnecessary diff with AMD's fork. --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 6 ++ llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 10 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index eb488843b53e0..92ab106dd4a98 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1151,6 +1151,7 @@ class GCNPassConfig final : public AMDGPUPassConfig { void addPostRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; + void addPostBBSections() override; }; } // end anonymous namespace @@ -1690,6 +1691,11 @@ void GCNPassConfig::addPreEmitPass() { addPass(&AMDGPUInsertDelayAluID); addPass(&BranchRelaxationPassID); +} + +void GCNPassConfig::addPostBBSections() { + // We run this later to avoid passes like livedebugvalues and BBSections + // having to deal with the apparent multi-entry functions we may generate. addPass(createAMDGPUPreloadKernArgPrologLegacyPass()); } diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 893b9fa6fb40d..d7f54f3b8e9e2 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -145,11 +145,11 @@ ; GCN-O0-NEXT:Post RA hazard recognizer ; GCN-O0-NEXT:AMDGPU Insert waits for SGPR read hazards ; GCN-O0-NEXT:Branch relaxation pass -; GCN-O0-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O0-NEXT:Register Usage Information Collector Pass ; GCN-O0-NEXT:Remove Loads Into Fake Uses ; GCN-O0-NEXT:Live DEBUG_VALUE analysis ; GCN-O0-NEXT:Machine Sanitizer Binary Metadata +; GCN-O0-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O0-NEXT:Lazy Machine Block Frequency Analysis ; GCN-O0-NEXT:Machine Optimization Remark Emitter ; GCN-O0-NEXT:Stack Frame Layout Analysis @@ -430,11 +430,11 @@ ; GCN-O1-NEXT:AMDGPU Insert waits for SGPR read hazards ; GCN-O1-NEXT:AMDGPU Insert Delay ALU ; GCN-O1-NEXT:Branch relaxation pass -; GCN-O1-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O1-NEXT:Register Usage Information Collector Pass ; GCN-O1-NEXT:Remove Loads Into Fake Uses ; GCN-O1-NEXT:Live DEBUG_VALUE analysis ; GCN-O1-NEXT:Machine Sanitizer Binary Metadata +; GCN-O1-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O1-NEXT:Lazy Machine Block Frequency Analysis ; GCN-O1-NEXT:Machine Optimization Remark Emitter ; GCN-O1-NEXT:Stack Frame Layout Analysis @@ -743,11 +743,11 @@ ; GCN-O1-OPTS-NEXT:AMDGPU Insert waits for SGPR read hazards ; GCN-O1-OPTS-NEXT:AMDGPU Insert Delay ALU ; GCN-O1-OPTS-NEXT:Branch relaxation pass -; GCN-O1-OPTS-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O1-OPTS-NEXT:Register Usage Information Collector Pass ; GCN-O1-OPTS-NEXT:Remove Loads Into Fake Uses ; GCN-O1-OPTS-NEXT:Live DEBUG_VALUE analysis ; GCN-O1-OPTS-NEXT:Machine Sanitizer Binary Metadata +; GCN-O1-OPTS-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O1-OPTS-NEXT:Lazy Machine Block Frequency Analysis ; GCN-O1-OPTS-NEXT:Machine Optimization Remark Emitter ; GCN-O1-OPTS-NEXT:Stack Frame Layout Analysis @@ -1062,11 +1062,11 @@ ; GCN-O2-NEXT:AMDGPU Insert waits for SGPR read hazards ; GCN-O2-NEXT:AMDGPU Insert Delay ALU ; GCN-O2-NEXT:Branch relaxation pass -; GCN-O2-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O2-NEXT:Register Usage Information Collector Pass ; GCN-O2-NEXT:Remove Loads Into Fake Uses ; GCN-O2-NEXT:Live DEBUG_VALUE analysis ; GCN-O2-NEXT:Machine Sanitizer Binary Metadata +; GCN-O2-NEXT:AMDGPU Preload Kernel Arguments Prolog ; GCN-O2-NEXT:Lazy Machine Block Frequency Analysis ; GCN-O2-NEXT:Ma
[llvm-branch-commits] [libcxx] release/20.x: [libc++][TZDB] Fixes mapping of nonexisting time. (#127330) (PR #127531)
llvmbot wrote: @ldionne What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/127531 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [Hexagon] Explicitly truncate constant in UAddSubO (#127360) (PR #127527)
llvmbot wrote: @llvm/pr-subscribers-backend-hexagon Author: None (llvmbot) Changes Backport 788cb725d8b92a82e41e64540dccca97c9086a58 Requested by: @nikic --- Full diff: https://github.com/llvm/llvm-project/pull/127527.diff 2 Files Affected: - (modified) llvm/lib/Target/Hexagon/HexagonISelLowering.cpp (+1-1) - (added) llvm/test/CodeGen/Hexagon/iss127296.ll (+18) ``diff diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 12ca0c505bd06..5ce5cae2ff906 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3273,7 +3273,7 @@ HexagonTargetLowering::LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const { if (Opc == ISD::USUBO) { SDValue Op = DAG.getNode(ISD::SUB, dl, VTs.VTs[0], {X, Y}); SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op, -DAG.getConstant(-1, dl, ty(Op)), ISD::SETEQ); +DAG.getAllOnesConstant(dl, ty(Op)), ISD::SETEQ); return DAG.getMergeValues({Op, Ov}, dl); } } diff --git a/llvm/test/CodeGen/Hexagon/iss127296.ll b/llvm/test/CodeGen/Hexagon/iss127296.ll new file mode 100644 index 0..bf0e7a9881014 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/iss127296.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=hexagon -O0 < %s | FileCheck %s + +; CHECK: r0 = add(r0,#-1) + +define fastcc void @os.linux.tls.initStatic(i32 %x) { + %1 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 1) + br label %2 + + 2:; preds = %0 + %3 = extractvalue { i32, i1 } %1, 0 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + `` https://github.com/llvm/llvm-project/pull/127527 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [Hexagon] Explicitly truncate constant in UAddSubO (#127360) (PR #127527)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/127527 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libc++] Fixes (|multi)_set spaceship operator. (#127326) (PR #127342)
https://github.com/ldionne approved this pull request. https://github.com/llvm/llvm-project/pull/127342 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: AMDGPU: Stop emitting an error on illegal addrspacecasts (#127487) (PR #127496)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/127496 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: AMDGPU: Stop emitting an error on illegal addrspacecasts (#127487) (PR #127496)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/127496 Backport 18ea6c9 Requested by: @arsenm >From 5afb6db6e5b8e379ec75072b696df463c50ff064 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Feb 2025 21:03:50 +0700 Subject: [PATCH] AMDGPU: Stop emitting an error on illegal addrspacecasts (#127487) These cannot be static compile errors, and should be treated as poison. Invalid casts may be introduced which are dynamically dead. For example: ``` void foo(volatile generic int* x) { __builtin_assume(is_shared(x)); *x = 4; } void bar() { private int y; foo(&y); // violation, wrong address space } ``` This could produce a compile time backend error or not depending on the optimization level. Similarly, the new test demonstrates a failure on a lowered atomicrmw which required inserting runtime address space checks. The invalid cases are dynamically dead, we should not error, and the AtomicExpand pass shouldn't have to consider the details of the incoming pointer to produce valid IR. This should go to the release branch. This fixes broken -O0 compiles with 64-bit atomics which would have started failing in 1d0370872f28ec9965448f33db1b105addaf64ae. (cherry picked from commit 18ea6c928088cf9ad2a990bfcca546c608825a7f) --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 7 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 7 +- llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll | 646 ++ .../CodeGen/AMDGPU/invalid-addrspacecast.ll | 44 +- 4 files changed, 687 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index e9e47eaadd557..e84f0f5fa615a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2426,11 +2426,8 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( return true; } - DiagnosticInfoUnsupported InvalidAddrSpaceCast( - MF.getFunction(), "invalid addrspacecast", B.getDebugLoc()); - - LLVMContext &Ctx = MF.getFunction().getContext(); - Ctx.diagnose(InvalidAddrSpaceCast); + // Invalid casts are poison. + // TODO: Should return poison B.buildUndef(Dst); MI.eraseFromParent(); return true; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index b632c50dae0e3..e09df53995d61 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7340,11 +7340,8 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, // global <-> flat are no-ops and never emitted. - const MachineFunction &MF = DAG.getMachineFunction(); - DiagnosticInfoUnsupported InvalidAddrSpaceCast( - MF.getFunction(), "invalid addrspacecast", SL.getDebugLoc()); - DAG.getContext()->diagnose(InvalidAddrSpaceCast); - + // Invalid casts are poison. + // TODO: Should return poison return DAG.getUNDEF(Op->getValueType(0)); } diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll index f5c9b1a79b476..5c62730fdfe8e 100644 --- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll +++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll @@ -444,6 +444,652 @@ define float @no_unsafe(ptr %addr, float %val) { ret float %res } +@global = hidden addrspace(1) global i64 0, align 8 + +; Make sure there is no error on an invalid addrspacecast without optimizations +define i64 @optnone_atomicrmw_add_i64_expand(i64 %val) #1 { +; GFX908-LABEL: optnone_atomicrmw_add_i64_expand: +; GFX908: ; %bb.0: +; GFX908-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX908-NEXT:s_mov_b64 s[4:5], src_private_base +; GFX908-NEXT:s_mov_b32 s6, 32 +; GFX908-NEXT:s_lshr_b64 s[4:5], s[4:5], s6 +; GFX908-NEXT:s_getpc_b64 s[6:7] +; GFX908-NEXT:s_add_u32 s6, s6, global@rel32@lo+4 +; GFX908-NEXT:s_addc_u32 s7, s7, global@rel32@hi+12 +; GFX908-NEXT:s_cmp_eq_u32 s7, s4 +; GFX908-NEXT:s_cselect_b64 s[4:5], -1, 0 +; GFX908-NEXT:v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX908-NEXT:s_mov_b64 s[4:5], -1 +; GFX908-NEXT:s_mov_b32 s6, 1 +; GFX908-NEXT:v_cmp_ne_u32_e64 s[6:7], v2, s6 +; GFX908-NEXT:s_and_b64 vcc, exec, s[6:7] +; GFX908-NEXT:; implicit-def: $vgpr3_vgpr4 +; GFX908-NEXT:s_cbranch_vccnz .LBB4_3 +; GFX908-NEXT: .LBB4_1: ; %Flow +; GFX908-NEXT:v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX908-NEXT:s_mov_b32 s4, 1 +; GFX908-NEXT:v_cmp_ne_u32_e64 s[4:5], v2, s4 +; GFX908-NEXT:s_and_b64 vcc, exec, s[4:5] +; GFX908-NEXT:s_cbranch_vccnz .LBB4_4 +; GFX908-NEXT: ; %bb.2: ; %atomicrmw.private +; GFX908-NEXT:s_waitcnt lgkmcnt(0) +; GFX908-NEXT:buffer_load_dword v3, v0, s[0:3], 0 offen +; GFX908-NEXT:s_waitcnt vmcnt(0) +; GFX908-NEXT:v_mov_b32_e32 v4, v3 +; GFX908-NEXT:v_add_co_u32_e64 v0, s[4:5], v3, v0 +; GFX908-NEXT:v_addc_co_u32_e64 v1, s[4:5], v4, v1, s[4:5] +
[llvm-branch-commits] [llvm] AMDGPU: Handle subregister uses in SIFoldOperands constant folding (PR #127485)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/127485.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+33-24) - (modified) llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir (+34) ``diff diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index d8f3f9c54abc1..30242c461768c 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -123,7 +123,7 @@ class SIFoldOperandsImpl { SmallVectorImpl &FoldList, SmallVectorImpl &CopiesToReplace) const; - MachineOperand *getImmOrMaterializedImm(MachineOperand &Op) const; + std::optional getImmOrMaterializedImm(MachineOperand &Op) const; bool tryConstantFoldOp(MachineInstr *MI) const; bool tryFoldCndMask(MachineInstr &MI) const; bool tryFoldZeroHighBits(MachineInstr &MI) const; @@ -1293,21 +1293,22 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) { MI.removeOperand(I); } -MachineOperand * +std::optional SIFoldOperandsImpl::getImmOrMaterializedImm(MachineOperand &Op) const { - // If this has a subregister, it obviously is a register source. - if (!Op.isReg() || Op.getSubReg() != AMDGPU::NoSubRegister || - !Op.getReg().isVirtual()) -return &Op; + if (Op.isImm()) +return Op.getImm(); - MachineInstr *Def = MRI->getVRegDef(Op.getReg()); + if (!Op.isReg() || !Op.getReg().isVirtual()) +return std::nullopt; + + const MachineInstr *Def = MRI->getVRegDef(Op.getReg()); if (Def && Def->isMoveImmediate()) { -MachineOperand &ImmSrc = Def->getOperand(1); +const MachineOperand &ImmSrc = Def->getOperand(1); if (ImmSrc.isImm()) - return &ImmSrc; + return TII->extractSubregFromImm(ImmSrc.getImm(), Op.getSubReg()); } - return &Op; + return std::nullopt; } // Try to simplify operations with a constant that may appear after instruction @@ -1322,12 +1323,14 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); if (Src0Idx == -1) return false; - MachineOperand *Src0 = getImmOrMaterializedImm(MI->getOperand(Src0Idx)); + + MachineOperand *Src0 = &MI->getOperand(Src0Idx); + std::optional Src0Imm = getImmOrMaterializedImm(*Src0); if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 || Opc == AMDGPU::S_NOT_B32) && - Src0->isImm()) { -MI->getOperand(1).ChangeToImmediate(~Src0->getImm()); + Src0Imm) { +MI->getOperand(1).ChangeToImmediate(~*Src0Imm); mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32))); return true; } @@ -1335,17 +1338,19 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); if (Src1Idx == -1) return false; - MachineOperand *Src1 = getImmOrMaterializedImm(MI->getOperand(Src1Idx)); - if (!Src0->isImm() && !Src1->isImm()) + MachineOperand *Src1 = &MI->getOperand(Src1Idx); + std::optional Src1Imm = getImmOrMaterializedImm(*Src1); + + if (!Src0Imm && !Src1Imm) return false; // and k0, k1 -> v_mov_b32 (k0 & k1) // or k0, k1 -> v_mov_b32 (k0 | k1) // xor k0, k1 -> v_mov_b32 (k0 ^ k1) - if (Src0->isImm() && Src1->isImm()) { + if (Src0Imm && Src1Imm) { int32_t NewImm; -if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm())) +if (!evalBinaryInstruction(Opc, NewImm, *Src0Imm, *Src1Imm)) return false; bool IsSGPR = TRI->isSGPRReg(*MRI, MI->getOperand(0).getReg()); @@ -1361,12 +1366,13 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { if (!MI->isCommutable()) return false; - if (Src0->isImm() && !Src1->isImm()) { + if (Src0Imm && !Src1Imm) { std::swap(Src0, Src1); std::swap(Src0Idx, Src1Idx); +std::swap(Src0Imm, Src1Imm); } - int32_t Src1Val = static_cast(Src1->getImm()); + int32_t Src1Val = static_cast(*Src1Imm); if (Opc == AMDGPU::V_OR_B32_e64 || Opc == AMDGPU::V_OR_B32_e32 || Opc == AMDGPU::S_OR_B32) { @@ -1423,9 +1429,12 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const { MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (!Src1->isIdenticalTo(*Src0)) { -auto *Src0Imm = getImmOrMaterializedImm(*Src0); -auto *Src1Imm = getImmOrMaterializedImm(*Src1); -if (!Src1Imm->isIdenticalTo(*Src0Imm)) +std::optional Src1Imm = getImmOrMaterializedImm(*Src1); +if (!Src1Imm) + return false; + +std::optional Src0Imm = getImmOrMaterializedImm(*Src0); +if (!Src0Imm || *Src0Imm != *Src1Imm) return false; } @@ -1458,8 +1467,8 @@ bool SIFoldOperands
[llvm-branch-commits] [llvm] release/20.x: AMDGPU: Stop emitting an error on illegal addrspacecasts (#127487) (PR #127496)
llvmbot wrote: @jhuber6 What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/127496 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: AMDGPU: Stop emitting an error on illegal addrspacecasts (#127487) (PR #127496)
https://github.com/jhuber6 approved this pull request. https://github.com/llvm/llvm-project/pull/127496 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix foldImmediate breaking register class constraints (PR #127481)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/127481?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#127485** https://app.graphite.dev/github/pr/llvm/llvm-project/127485?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127484** https://app.graphite.dev/github/pr/llvm/llvm-project/127484?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127483** https://app.graphite.dev/github/pr/llvm/llvm-project/127483?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127482** https://app.graphite.dev/github/pr/llvm/llvm-project/127482?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127481** https://app.graphite.dev/github/pr/llvm/llvm-project/127481?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/127481?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#127480** https://app.graphite.dev/github/pr/llvm/llvm-project/127480?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/127481 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Implement getConstValDefinedInReg and use in foldImmediate (NFC) (PR #127482)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/127482 This is NFC because it currently only matters for cases that are not isMoveImmediate, and we do not yet implement any of those. This just moves the implementation of foldImmediate to use the common interface, similar to how x86 does it. >From 91b6a4a4939bc5f9518ac4c66794af5fc2ba2193 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Feb 2025 13:22:06 +0700 Subject: [PATCH] AMDGPU: Implement getConstValDefinedInReg and use in foldImmediate (NFC) This is NFC because it currently only matters for cases that are not isMoveImmediate, and we do not yet implement any of those. This just moves the implementation of foldImmediate to use the common interface, similar to how x86 does it. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 67 ++ llvm/lib/Target/AMDGPU/SIInstrInfo.h | 22 +++-- 2 files changed, 54 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0dafa527f722a..07f1dc299c0e5 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1327,6 +1327,33 @@ Register SIInstrInfo::insertNE(MachineBasicBlock *MBB, return Reg; } +bool SIInstrInfo::getConstValDefinedInReg(const MachineInstr &MI, + const Register Reg, + int64_t &ImmVal) const { + // TODO: Handle all the special cases handled in SIShrinkInstructions + // (e.g. s_brev_b32 imm -> reverse(imm)) + switch (MI.getOpcode()) { + case AMDGPU::V_MOV_B32_e32: + case AMDGPU::S_MOV_B32: + case AMDGPU::S_MOVK_I32: + case AMDGPU::S_MOV_B64: + case AMDGPU::V_MOV_B64_e32: + case AMDGPU::V_ACCVGPR_WRITE_B32_e64: + case AMDGPU::S_MOV_B64_IMM_PSEUDO: + case AMDGPU::V_MOV_B64_PSEUDO: { +const MachineOperand &Src0 = MI.getOperand(1); +if (Src0.isImm()) { + ImmVal = Src0.getImm(); + return MI.getOperand(0).getReg() == Reg; +} + +return false; + } + default: +return false; + } +} + unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { if (RI.isAGPRClass(DstRC)) @@ -3395,27 +3422,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (!MRI->hasOneNonDBGUse(Reg)) return false; - switch (DefMI.getOpcode()) { - default: -return false; - case AMDGPU::V_MOV_B64_e32: - case AMDGPU::S_MOV_B64: - case AMDGPU::V_MOV_B64_PSEUDO: - case AMDGPU::S_MOV_B64_IMM_PSEUDO: - case AMDGPU::V_MOV_B32_e32: - case AMDGPU::S_MOV_B32: - case AMDGPU::V_ACCVGPR_WRITE_B32_e64: -break; - } - - const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0); - assert(ImmOp); - // FIXME: We could handle FrameIndex values here. - if (!ImmOp->isImm()) + int64_t Imm; + if (!getConstValDefinedInReg(DefMI, Reg, Imm)) return false; - auto getImmFor = [ImmOp](const MachineOperand &UseOp) -> int64_t { -int64_t Imm = ImmOp->getImm(); + auto getImmFor = [=](const MachineOperand &UseOp) -> int64_t { switch (UseOp.getSubReg()) { default: return Imm; @@ -3502,12 +3513,14 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, // If this is a free constant, there's no reason to do this. // TODO: We could fold this here instead of letting SIFoldOperands do it // later. -MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0); +int Src0Idx = getNamedOperandIdx(UseMI.getOpcode(), AMDGPU::OpName::src0); // Any src operand can be used for the legality check. -if (isInlineConstant(UseMI, *Src0, *ImmOp)) +if (isInlineConstant(UseMI, Src0Idx, Imm)) return false; +MachineOperand *Src0 = &UseMI.getOperand(Src0Idx); + bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64; bool IsFMA = @@ -4267,18 +4280,11 @@ bool SIInstrInfo::isInlineConstant(const APFloat &Imm) const { } } -bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, - uint8_t OperandType) const { - assert(!MO.isReg() && "isInlineConstant called on register operand!"); - if (!MO.isImm()) -return false; - +bool SIInstrInfo::isInlineConstant(int64_t Imm, uint8_t OperandType) const { // MachineOperand provides no way to tell the true operand size, since it only // records a 64-bit value. We need to know the size to determine if a 32-bit // floating point immediate bit pattern is legal for an integer immediate. It // would be for any 32-bit integer operand, but would not be for a 64-bit one. - - int64_t Imm = MO.getImm(); switch (OperandType) { case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: @@ -4300,8 +4306,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &M
[llvm-branch-commits] [llvm] [ReleaseNotes][RemoveDIs] Add release note for deprecated insertion methods (PR #127493)
https://github.com/jmorse created https://github.com/llvm/llvm-project/pull/127493 I've stuck this under "LLVM Infrastructure" as the IR plumbing methods feel like infrastructure. The LLVM17 release notes stuck similar notes in that section too. >From 7a3b7dd0acc441be19f232f6f44baa239b7d94c4 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Mon, 17 Feb 2025 13:38:41 + Subject: [PATCH] [ReleaseNotes][RemoveDIs] Add release note for deprecated insertion methods --- llvm/docs/ReleaseNotes.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index b42e111dc4283..abba2f2257d3d 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -116,6 +116,8 @@ Changes to the LLVM IR Changes to LLVM infrastructure -- + * Several methods that use Instruction pointers as insertion positions (moveBefore, getFirstNonPHI) have been deprecated in favour of overloads and variants that use `BasicBlock::iterator`s instead. The instruction-flavoured methods will be removed in a future release. This work is part of the [RemoveDIs](https://llvm.org/docs/RemoveDIsDebugInfo.html) project, the documentation for which contains instructions for updating call-sites using the deprecated methods. + Changes to building LLVM ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ReleaseNotes][RemoveDIs] Add release note for deprecated insertion methods (PR #127493)
https://github.com/jmorse edited https://github.com/llvm/llvm-project/pull/127493 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ReleaseNotes][RemoveDIs] Add release note for deprecated insertion methods (PR #127493)
https://github.com/OCHyams commented: Couple of nits from me (sorry). https://github.com/llvm/llvm-project/pull/127493 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ReleaseNotes][RemoveDIs] Add release note for deprecated insertion methods (PR #127493)
@@ -116,6 +116,8 @@ Changes to the LLVM IR Changes to LLVM infrastructure -- + * Several methods that use Instruction pointers as insertion positions (moveBefore, getFirstNonPHI) have been deprecated in favour of overloads and variants that use `BasicBlock::iterator`s instead. The instruction-flavoured methods will be removed in a future release. This work is part of the [RemoveDIs](https://llvm.org/docs/RemoveDIsDebugInfo.html) project, the documentation for which contains instructions for updating call-sites using the deprecated methods. OCHyams wrote: > Couple of nits from me (sorry). ... that github ate. I think there's some unnecessary ambiguity around whether `moveBefore` + `getFirstNonPHI` is the complete list of affected functions or not (I think both the word "several" and then having those two in parens is causing it). Can we be more precise? Secondly, IMO I think changing "instruction-flavoured" to "pointer-flavoured" slightly improves clarity. YMMV though. https://github.com/llvm/llvm-project/pull/127493 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -3745,17 +3842,33 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, return builder.saveIP(); }; + auto customMapperCB = + [&](unsigned int i) -> llvm::Expected { +llvm::Function *mapperFunc = nullptr; +if (combinedInfo.Mappers[i]) { + info.HasMapper = true; + llvm::Expected newFn = getOrCreateUserDefinedMapperFunc( + combinedInfo.Mappers[i], builder, moduleTranslation); + if (!newFn) +return newFn.takeError(); + mapperFunc = *newFn; +} +return mapperFunc; skatrak wrote: Nit: You can probably simplify this a bit. ```suggestion if (!combinedInfo.Mappers[i]) return nullptr; info.HasMapper = true; return getOrCreateUserDefinedMapperFunc( combinedInfo.Mappers[i], builder, moduleTranslation); ``` https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -3529,6 +3549,86 @@ static void genMapInfos(llvm::IRBuilderBase &builder, } } +static llvm::Expected +emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation); + +static llvm::Expected +getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + auto declMapperOp = cast(op); + std::string mapperFuncName = + moduleTranslation.getOpenMPBuilder()->createPlatformSpecificName( + {"omp_mapper", declMapperOp.getSymName()}); + if (auto *lookupFunc = moduleTranslation.lookupFunction(mapperFuncName)) +return lookupFunc; + + llvm::Expected mapperFunc = + emitUserDefinedMapper(declMapperOp, builder, moduleTranslation); + if (!mapperFunc) +return mapperFunc.takeError(); + moduleTranslation.mapFunction(mapperFuncName, *mapperFunc); skatrak wrote: I think this should be moved to `emitUserDefinedMapper`. Even though it looks unlikely that there would be any callers to that function from anywhere else, I think the right place to register the new function is where it's created. That way, this can also be simplified by returning directly the result of the `emitUserDefinedMapper` call. https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
https://github.com/TIFitis updated https://github.com/llvm/llvm-project/pull/124746 >From 7b5c918249a9c29ae586d9f1ccae6b7359fcd793 Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Tue, 28 Jan 2025 13:38:13 + Subject: [PATCH 1/8] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers This patch adds OpenMPToLLVMIRTranslation support for the OpenMP Declare Mapper directive. Since both MLIR and Clang now support custom mappers, I've made the relative params required instead of optional as well. Depends on #121005 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 11 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 31 +-- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 70 +++--- .../Frontend/OpenMPIRBuilderTest.cpp | 46 ++-- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 215 +++--- mlir/test/Target/LLVMIR/omptarget-llvm.mlir | 117 ++ .../fortran/target-custom-mapper.f90 | 46 7 files changed, 437 insertions(+), 99 deletions(-) create mode 100644 offload/test/offloading/fortran/target-custom-mapper.f90 diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index cafaaa364cb76..b919c1f6ac627 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -8889,8 +8889,8 @@ static void emitOffloadingArraysAndArgs( return MFunc; }; OMPBuilder.emitOffloadingArraysAndArgs( - AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, IsNonContiguous, - ForEndCall, DeviceAddrCB, CustomMapperCB); + AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB, + IsNonContiguous, ForEndCall, DeviceAddrCB); } /// Check for inner distribute directive. @@ -9099,9 +9099,10 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out); std::string Name = getName({"omp_mapper", TyStr, D->getName()}); - auto *NewFn = OMPBuilder.emitUserDefinedMapper(PrivatizeAndGenMapInfoCB, - ElemTy, Name, CustomMapperCB); - UDMMap.try_emplace(D, NewFn); + llvm::Expected NewFn = OMPBuilder.emitUserDefinedMapper( + PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB); + assert(NewFn && "Unexpected error in emitUserDefinedMapper"); + UDMMap.try_emplace(D, *NewFn); if (CGF) FunctionUDMMap[CGF->CurFn].push_back(D); } diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index d25077cae63e4..151bd36aadaf0 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2399,6 +2399,7 @@ class OpenMPIRBuilder { CurInfo.NonContigInfo.Strides.end()); } }; + using MapInfosOrErrorTy = Expected; /// Callback function type for functions emitting the host fallback code that /// is executed when the kernel launch fails. It takes an insertion point as @@ -2475,9 +2476,9 @@ class OpenMPIRBuilder { /// including base pointers, pointers, sizes, map types, user-defined mappers. void emitOffloadingArrays( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, - TargetDataInfo &Info, bool IsNonContiguous = false, - function_ref DeviceAddrCB = nullptr, - function_ref CustomMapperCB = nullptr); + TargetDataInfo &Info, function_ref CustomMapperCB, + bool IsNonContiguous = false, + function_ref DeviceAddrCB = nullptr); /// Allocates memory for and populates the arrays required for offloading /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it @@ -2488,9 +2489,9 @@ class OpenMPIRBuilder { void emitOffloadingArraysAndArgs( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, + function_ref CustomMapperCB, bool IsNonContiguous = false, bool ForEndCall = false, - function_ref DeviceAddrCB = nullptr, - function_ref CustomMapperCB = nullptr); + function_ref DeviceAddrCB = nullptr); /// Creates offloading entry for the provided entry ID \a ID, address \a /// Addr, size \a Size, and flags \a Flags. @@ -2950,12 +2951,12 @@ class OpenMPIRBuilder { /// \param FuncName Optional param to specify mapper function name. /// \param CustomMapperCB Optional callback to generate code related to /// custom mappers. - Function *emitUserDefinedMapper( - function_ref + Expected emitUserDefinedMapper( + function_ref PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, - function_ref CustomMapperCB = nullptr); + function_ref CustomMapperCB); /// Generator for '#omp target data' /// @@ -2969,21 +2970,21 @@ class OpenMPIRBuilder { /// \param IfCond Value which corresponds to the if clause
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -4673,7 +4804,8 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, .Case([&](omp::TaskwaitOp op) { return convertOmpTaskwaitOp(op, builder, moduleTranslation); }) - .Casehttps://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Push amdgpu-preload-kern-arg-prolog after livedebugvalues (PR #126148)
slinder1 wrote: ### Merge activity * **Feb 17, 1:26 PM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/126148). https://github.com/llvm/llvm-project/pull/126148 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libc++][TZDB] Fixes mapping of nonexisting time. (#127330) (PR #127531)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/127531 Backport 941f7cbf5a3e7aa9f36b002dc22cfdb4ff50fea8 Requested by: @mordante >From 2b96bbb64e0de22f9ac41aca6a3249cfbe8e8fa0 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Mon, 17 Feb 2025 19:08:07 +0100 Subject: [PATCH] [libc++][TZDB] Fixes mapping of nonexisting time. (#127330) All non-existing local times in a contiguous range should map to the same time point. This fixes a bug, were the times inside the range were mapped to the wrong time. Fixes: #113654 (cherry picked from commit 941f7cbf5a3e7aa9f36b002dc22cfdb4ff50fea8) --- libcxx/include/__chrono/time_zone.h | 8 ++-- .../time.zone.members/to_sys_choose.pass.cpp| 17 +++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/libcxx/include/__chrono/time_zone.h b/libcxx/include/__chrono/time_zone.h index ab5c22eceaaf1..d18d59d2736bf 100644 --- a/libcxx/include/__chrono/time_zone.h +++ b/libcxx/include/__chrono/time_zone.h @@ -103,10 +103,14 @@ class _LIBCPP_AVAILABILITY_TZDB time_zone { to_sys(const local_time<_Duration>& __time, choose __z) const { local_info __info = get_info(__time); switch (__info.result) { -case local_info::unique: -case local_info::nonexistent: // first and second are the same +case local_info::unique: // first and second are the same return sys_time>{__time.time_since_epoch() - __info.first.offset}; +case local_info::nonexistent: + // first and second are the same + // All non-existing values are converted to the same time. + return sys_time>{__info.first.end}; + case local_info::ambiguous: switch (__z) { case choose::earliest: diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp index bad4ef352e9b9..1147c9fadf9ae 100644 --- a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp @@ -88,7 +88,7 @@ static void test_nonexistent() { // Pick an historic date where it's well known what the time zone rules were. // This makes it unlikely updates to the database change these rules. std::chrono::local_time time{ - (std::chrono::sys_days{std::chrono::March / 30 / 1986} + 2h + 30min).time_since_epoch()}; + (std::chrono::sys_days{std::chrono::March / 30 / 1986} + 2h).time_since_epoch()}; std::chrono::sys_seconds expected{time.time_since_epoch() - 1h}; @@ -100,6 +100,13 @@ static void test_nonexistent() { assert(tz->to_sys(time + 0us, std::chrono::choose::latest) == expected); assert(tz->to_sys(time + 0ms, std::chrono::choose::earliest) == expected); assert(tz->to_sys(time + 0s, std::chrono::choose::latest) == expected); + + // The entire nonexisting hour should map to the same time. + // For nonexistant the value of std::chrono::choose has no effect. + assert(tz->to_sys(time + 1s, std::chrono::choose::earliest) == expected); + assert(tz->to_sys(time + 1min, std::chrono::choose::latest) == expected); + assert(tz->to_sys(time + 30min, std::chrono::choose::earliest) == expected); + assert(tz->to_sys(time + 59min + 59s, std::chrono::choose::latest) == expected); } // Tests ambiguous conversions. @@ -120,7 +127,7 @@ static void test_ambiguous() { // Pick an historic date where it's well known what the time zone rules were. // This makes it unlikely updates to the database change these rules. std::chrono::local_time time{ - (std::chrono::sys_days{std::chrono::September / 28 / 1986} + 2h + 30min).time_since_epoch()}; + (std::chrono::sys_days{std::chrono::September / 28 / 1986} + 2h).time_since_epoch()}; std::chrono::sys_seconds earlier{time.time_since_epoch() - 2h}; std::chrono::sys_seconds later{time.time_since_epoch() - 1h}; @@ -133,6 +140,12 @@ static void test_ambiguous() { assert(tz->to_sys(time + 0us, std::chrono::choose::latest) == later); assert(tz->to_sys(time + 0ms, std::chrono::choose::earliest) == earlier); assert(tz->to_sys(time + 0s, std::chrono::choose::latest) == later); + + // Test times in the ambigious hour + assert(tz->to_sys(time + 1s, std::chrono::choose::earliest) == earlier + 1s); + assert(tz->to_sys(time + 1min, std::chrono::choose::latest) == later + 1min); + assert(tz->to_sys(time + 30min, std::chrono::choose::earliest) == earlier + 30min); + assert(tz->to_sys(time + 59min + 59s, std::chrono::choose::latest) == later + 59min + 59s); } // This test does the basic validations of this function. The library function ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Respect MBB alignment in the getFunctionCodeSize() (PR #127142)
rampitec wrote: Which one do you prefer, this or https://github.com/llvm/llvm-project/pull/127246? They are mutually exclusive. https://github.com/llvm/llvm-project/pull/127142 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -3529,6 +3549,86 @@ static void genMapInfos(llvm::IRBuilderBase &builder, } } +static llvm::Expected +emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation); + +static llvm::Expected +getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + auto declMapperOp = cast(op); + std::string mapperFuncName = + moduleTranslation.getOpenMPBuilder()->createPlatformSpecificName( + {"omp_mapper", declMapperOp.getSymName()}); + if (auto *lookupFunc = moduleTranslation.lookupFunction(mapperFuncName)) +return lookupFunc; + + llvm::Expected mapperFunc = + emitUserDefinedMapper(declMapperOp, builder, moduleTranslation); + if (!mapperFunc) +return mapperFunc.takeError(); + moduleTranslation.mapFunction(mapperFuncName, *mapperFunc); + return mapperFunc; +} + +static llvm::Expected +emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + auto declMapperOp = cast(op); + auto declMapperInfoOp = declMapperOp.getDeclareMapperInfo(); + DataLayout dl = DataLayout(declMapperOp->getParentOfType()); + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + llvm::Type *varType = moduleTranslation.convertType(declMapperOp.getType()); + std::string mapperName = ompBuilder->createPlatformSpecificName( + {"omp_mapper", declMapperOp.getSymName()}); + SmallVector mapVars = declMapperInfoOp.getMapVars(); + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + // Fill up the arrays with all the mapped variables. + MapInfosTy combinedInfo; + auto genMapInfoCB = + [&](InsertPointTy codeGenIP, llvm::Value *ptrPHI, + llvm::Value *unused2) -> llvm::OpenMPIRBuilder::MapInfosOrErrorTy { +builder.restoreIP(codeGenIP); +moduleTranslation.mapValue(declMapperOp.getSymVal(), ptrPHI); +moduleTranslation.mapBlock(&declMapperOp.getRegion().front(), + builder.GetInsertBlock()); +if (failed(moduleTranslation.convertBlock(declMapperOp.getRegion().front(), + /*ignoreArguments=*/true, + builder))) + return llvm::make_error(); +MapInfoData mapData; +collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl, + builder); +genMapInfos(builder, moduleTranslation, dl, combinedInfo, mapData); + +// Drop the mapping that is no longer necessary so that the same region can +// be processed multiple times. +moduleTranslation.forgetMapping(declMapperOp.getRegion()); +return combinedInfo; + }; + + auto customMapperCB = [&](unsigned i) -> llvm::Expected { +llvm::Function *mapperFunc = nullptr; +if (combinedInfo.Mappers[i]) { + // Call the corresponding mapper function. + llvm::Expected newFn = getOrCreateUserDefinedMapperFunc( + combinedInfo.Mappers[i], builder, moduleTranslation); + if (!newFn) +return newFn.takeError(); + mapperFunc = *newFn; +} +return mapperFunc; skatrak wrote: ```suggestion if (!combinedInfo.Mappers[i]) return nullptr; // Call the corresponding mapper function. return getOrCreateUserDefinedMapperFunc( combinedInfo.Mappers[i], builder, moduleTranslation); ``` https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
https://github.com/skatrak commented: Thanks again Akash. I have a couple of small code simplification suggestions and nits, but otherwise LGTM. https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -4673,7 +4804,8 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, .Case([&](omp::TaskwaitOp op) { return convertOmpTaskwaitOp(op, builder, moduleTranslation); }) - .Casehttps://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RegAlloc][NewPM] Plug Greedy RA in codegen pipeline (PR #120557)
https://github.com/cdevadas approved this pull request. https://github.com/llvm/llvm-project/pull/120557 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ReleaseNotes][RemoveDIs] Add release note for deprecated insertion methods (PR #127493)
https://github.com/jmorse updated https://github.com/llvm/llvm-project/pull/127493 >From 7a3b7dd0acc441be19f232f6f44baa239b7d94c4 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Mon, 17 Feb 2025 13:38:41 + Subject: [PATCH 1/2] [ReleaseNotes][RemoveDIs] Add release note for deprecated insertion methods --- llvm/docs/ReleaseNotes.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index b42e111dc4283..abba2f2257d3d 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -116,6 +116,8 @@ Changes to the LLVM IR Changes to LLVM infrastructure -- + * Several methods that use Instruction pointers as insertion positions (moveBefore, getFirstNonPHI) have been deprecated in favour of overloads and variants that use `BasicBlock::iterator`s instead. The instruction-flavoured methods will be removed in a future release. This work is part of the [RemoveDIs](https://llvm.org/docs/RemoveDIsDebugInfo.html) project, the documentation for which contains instructions for updating call-sites using the deprecated methods. + Changes to building LLVM >From 07a18c7738dc894c45085745d0bfa055ae5354a7 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Mon, 17 Feb 2025 14:45:03 + Subject: [PATCH 2/2] Reword --- llvm/docs/ReleaseNotes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index abba2f2257d3d..c80aecfdea084 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -116,7 +116,7 @@ Changes to the LLVM IR Changes to LLVM infrastructure -- - * Several methods that use Instruction pointers as insertion positions (moveBefore, getFirstNonPHI) have been deprecated in favour of overloads and variants that use `BasicBlock::iterator`s instead. The instruction-flavoured methods will be removed in a future release. This work is part of the [RemoveDIs](https://llvm.org/docs/RemoveDIsDebugInfo.html) project, the documentation for which contains instructions for updating call-sites using the deprecated methods. + * Two methods that use Instruction pointers as code positions (moveBefore, getFirstNonPHI) have been deprecated in favour of overloads and variants that use `BasicBlock::iterator`s instead. The pointer-flavoured methods will be removed in a future release. This work is part of the [RemoveDIs](https://llvm.org/docs/RemoveDIsDebugInfo.html) project, the documentation for which contains instructions for updating call-sites using the deprecated methods. Changes to building LLVM ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: AMDGPU: Stop emitting an error on illegal addrspacecasts (#127487) (PR #127496)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: None (llvmbot) Changes Backport 18ea6c9 Requested by: @arsenm --- Patch is 33.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127496.diff 4 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+2-5) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+2-5) - (modified) llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll (+646) - (modified) llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll (+37-7) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index e9e47eaadd557..e84f0f5fa615a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2426,11 +2426,8 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( return true; } - DiagnosticInfoUnsupported InvalidAddrSpaceCast( - MF.getFunction(), "invalid addrspacecast", B.getDebugLoc()); - - LLVMContext &Ctx = MF.getFunction().getContext(); - Ctx.diagnose(InvalidAddrSpaceCast); + // Invalid casts are poison. + // TODO: Should return poison B.buildUndef(Dst); MI.eraseFromParent(); return true; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index b632c50dae0e3..e09df53995d61 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7340,11 +7340,8 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, // global <-> flat are no-ops and never emitted. - const MachineFunction &MF = DAG.getMachineFunction(); - DiagnosticInfoUnsupported InvalidAddrSpaceCast( - MF.getFunction(), "invalid addrspacecast", SL.getDebugLoc()); - DAG.getContext()->diagnose(InvalidAddrSpaceCast); - + // Invalid casts are poison. + // TODO: Should return poison return DAG.getUNDEF(Op->getValueType(0)); } diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll index f5c9b1a79b476..5c62730fdfe8e 100644 --- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll +++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll @@ -444,6 +444,652 @@ define float @no_unsafe(ptr %addr, float %val) { ret float %res } +@global = hidden addrspace(1) global i64 0, align 8 + +; Make sure there is no error on an invalid addrspacecast without optimizations +define i64 @optnone_atomicrmw_add_i64_expand(i64 %val) #1 { +; GFX908-LABEL: optnone_atomicrmw_add_i64_expand: +; GFX908: ; %bb.0: +; GFX908-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX908-NEXT:s_mov_b64 s[4:5], src_private_base +; GFX908-NEXT:s_mov_b32 s6, 32 +; GFX908-NEXT:s_lshr_b64 s[4:5], s[4:5], s6 +; GFX908-NEXT:s_getpc_b64 s[6:7] +; GFX908-NEXT:s_add_u32 s6, s6, global@rel32@lo+4 +; GFX908-NEXT:s_addc_u32 s7, s7, global@rel32@hi+12 +; GFX908-NEXT:s_cmp_eq_u32 s7, s4 +; GFX908-NEXT:s_cselect_b64 s[4:5], -1, 0 +; GFX908-NEXT:v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX908-NEXT:s_mov_b64 s[4:5], -1 +; GFX908-NEXT:s_mov_b32 s6, 1 +; GFX908-NEXT:v_cmp_ne_u32_e64 s[6:7], v2, s6 +; GFX908-NEXT:s_and_b64 vcc, exec, s[6:7] +; GFX908-NEXT:; implicit-def: $vgpr3_vgpr4 +; GFX908-NEXT:s_cbranch_vccnz .LBB4_3 +; GFX908-NEXT: .LBB4_1: ; %Flow +; GFX908-NEXT:v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GFX908-NEXT:s_mov_b32 s4, 1 +; GFX908-NEXT:v_cmp_ne_u32_e64 s[4:5], v2, s4 +; GFX908-NEXT:s_and_b64 vcc, exec, s[4:5] +; GFX908-NEXT:s_cbranch_vccnz .LBB4_4 +; GFX908-NEXT: ; %bb.2: ; %atomicrmw.private +; GFX908-NEXT:s_waitcnt lgkmcnt(0) +; GFX908-NEXT:buffer_load_dword v3, v0, s[0:3], 0 offen +; GFX908-NEXT:s_waitcnt vmcnt(0) +; GFX908-NEXT:v_mov_b32_e32 v4, v3 +; GFX908-NEXT:v_add_co_u32_e64 v0, s[4:5], v3, v0 +; GFX908-NEXT:v_addc_co_u32_e64 v1, s[4:5], v4, v1, s[4:5] +; GFX908-NEXT:buffer_store_dword v1, v0, s[0:3], 0 offen +; GFX908-NEXT:buffer_store_dword v0, v0, s[0:3], 0 offen +; GFX908-NEXT:s_branch .LBB4_4 +; GFX908-NEXT: .LBB4_3: ; %atomicrmw.global +; GFX908-NEXT:s_getpc_b64 s[4:5] +; GFX908-NEXT:s_add_u32 s4, s4, global@rel32@lo+4 +; GFX908-NEXT:s_addc_u32 s5, s5, global@rel32@hi+12 +; GFX908-NEXT:v_mov_b32_e32 v2, s4 +; GFX908-NEXT:v_mov_b32_e32 v3, s5 +; GFX908-NEXT:flat_atomic_add_x2 v[3:4], v[2:3], v[0:1] glc +; GFX908-NEXT:s_mov_b64 s[4:5], 0 +; GFX908-NEXT:s_branch .LBB4_1 +; GFX908-NEXT: .LBB4_4: ; %atomicrmw.phi +; GFX908-NEXT: ; %bb.5: ; %atomicrmw.end +; GFX908-NEXT:s_mov_b32 s4, 32 +; GFX908-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT:v_lshrrev_b64 v[1:2], s4, v[3:4] +; GFX908-NEXT:v_mov_b32_e32 v0, v3 +; GFX908-NEXT:s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: optnone_atomicrmw_add_i64_expand: +; GFX90A: ; %bb.0: +; GFX90A-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT:s_m
[llvm-branch-commits] [llvm] [ReleaseNotes][RemoveDIs] Add release note for deprecated insertion methods (PR #127493)
https://github.com/OCHyams approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/127493 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocGreedy to NPM (PR #119540)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/119540 >From 38a3dab96c301bd21eaa7586e49484f35a237f71 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 11 Dec 2024 08:51:55 + Subject: [PATCH 1/8] [CodeGen][NewPM] Port RegAllocGreedy to NPM --- llvm/include/llvm/CodeGen/MachineFunction.h | 1 + llvm/include/llvm/CodeGen/Passes.h| 2 +- llvm/include/llvm/InitializePasses.h | 2 +- .../llvm/Passes/MachinePassRegistry.def | 9 + llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/RegAllocGreedy.cpp | 185 ++ llvm/lib/CodeGen/RegAllocGreedy.h | 57 +++--- llvm/lib/Passes/PassBuilder.cpp | 1 + 8 files changed, 196 insertions(+), 63 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index f1e595cde54e3..7fd0994883fe8 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -927,6 +927,7 @@ class LLVM_ABI MachineFunction { /// Run the current MachineFunction through the machine code verifier, useful /// for debugger use. + /// TODO: Add the param LiveStks /// \returns true if no problems were found. bool verify(LiveIntervals *LiveInts, SlotIndexes *Indexes, const char *Banner = nullptr, raw_ostream *OS = nullptr, diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index b5d2a7e6bf035..0182f21bee5f5 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -171,7 +171,7 @@ namespace llvm { extern char &LiveRangeShrinkID; /// Greedy register allocator. - extern char &RAGreedyID; + extern char &RAGreedyLegacyID; /// Basic register allocator. extern char &RABasicID; diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 5b30eb53208a8..69c9e14541907 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -248,7 +248,7 @@ void initializeProfileSummaryInfoWrapperPassPass(PassRegistry &); void initializePromoteLegacyPassPass(PassRegistry &); void initializeRABasicPass(PassRegistry &); void initializePseudoProbeInserterPass(PassRegistry &); -void initializeRAGreedyPass(PassRegistry &); +void initializeRAGreedyLegacyPass(PassRegistry &); void initializeReachingDefAnalysisPass(PassRegistry &); void initializeReassociateLegacyPassPass(PassRegistry &); void initializeRegAllocEvictionAdvisorAnalysisLegacyPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 373bd047e2395..78b4c8153e26b 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -194,6 +194,15 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS( return parseRegAllocFastPassOptions(*PB, Params); }, "filter=reg-filter;no-clear-vregs") + +MACHINE_FUNCTION_PASS_WITH_PARAMS( +"regallocgreedy", "RAGreedy", +[](RegAllocFilterFunc F) { return RAGreedyPass(F); }, +[PB = this](StringRef Params) { + // TODO: parseRegAllocFilter(*PB, Params); + return Expected(nullptr); +}, "" +) #undef MACHINE_FUNCTION_PASS_WITH_PARAMS // After a pass is converted to new pass manager, its entry should be moved from diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 35df2a479a545..21f76bdb2ad6b 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -112,7 +112,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); initializeRABasicPass(Registry); - initializeRAGreedyPass(Registry); + initializeRAGreedyLegacyPass(Registry); initializeRegAllocFastPass(Registry); initializeRegUsageInfoCollectorLegacyPass(Registry); initializeRegUsageInfoPropagationLegacyPass(Registry); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index bd81d630f9d1f..f4cc80c751350 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -43,8 +43,10 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegAllocEvictionAdvisor.h" +#include "llvm/CodeGen/RegAllocGreedyPass.h" #include "llvm/CodeGen/RegAllocPriorityAdvisor.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" @@ -55,6 +57,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Analysis.h" #include "llvm/IR/Debug
[llvm-branch-commits] [llvm] AMDGPU: Handle brev and not cases in getConstValDefinedInReg (PR #127483)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/127483?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#127485** https://app.graphite.dev/github/pr/llvm/llvm-project/127485?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127484** https://app.graphite.dev/github/pr/llvm/llvm-project/127484?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127483** https://app.graphite.dev/github/pr/llvm/llvm-project/127483?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/127483?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#127482** https://app.graphite.dev/github/pr/llvm/llvm-project/127482?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127481** https://app.graphite.dev/github/pr/llvm/llvm-project/127481?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127480** https://app.graphite.dev/github/pr/llvm/llvm-project/127480?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/127483 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ReleaseNotes][RemoveDIs] Add release note for deprecated insertion methods (PR #127493)
https://github.com/jmorse milestoned https://github.com/llvm/llvm-project/pull/127493 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ReleaseNotes][RemoveDIs] Add release note for deprecated insertion methods (PR #127493)
@@ -116,6 +116,8 @@ Changes to the LLVM IR Changes to LLVM infrastructure -- + * Several methods that use Instruction pointers as insertion positions (moveBefore, getFirstNonPHI) have been deprecated in favour of overloads and variants that use `BasicBlock::iterator`s instead. The instruction-flavoured methods will be removed in a future release. This work is part of the [RemoveDIs](https://llvm.org/docs/RemoveDIsDebugInfo.html) project, the documentation for which contains instructions for updating call-sites using the deprecated methods. jmorse wrote: Added some rewording to "Two methods", "pointer-flavoured", and stating that the methods use pointers as "code positions" because technically `getFirstNonPHI` isn't about insertion. (It's a bit tricky to correctly abstract the wording over "this is about insertions but not all the functions are inserters".) https://github.com/llvm/llvm-project/pull/127493 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Implement getConstValDefinedInReg and use in foldImmediate (NFC) (PR #127482)
https://github.com/shiltian approved this pull request. https://github.com/llvm/llvm-project/pull/127482 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Implement getConstValDefinedInReg and use in foldImmediate (NFC) (PR #127482)
https://github.com/shiltian edited https://github.com/llvm/llvm-project/pull/127482 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Extract lambda used in foldImmediate into a helper function (PR #127484)
@@ -3437,6 +3437,30 @@ void SIInstrInfo::removeModOperands(MachineInstr &MI) const { } } +std::optional SIInstrInfo::extractSubregFromImm(int64_t Imm, + unsigned SubRegIndex) { + switch (SubRegIndex) { + case AMDGPU::NoSubRegister: +return Imm; + case AMDGPU::sub0: +return Lo_32(Imm); + case AMDGPU::sub1: +return Hi_32(Imm); + case AMDGPU::lo16: +return SignExtend64<16>(Imm); + case AMDGPU::hi16: +return SignExtend64<16>(Imm >> 16); + case AMDGPU::sub1_lo16: +return SignExtend64<16>(Imm >> 32); + case AMDGPU::sub1_hi16: +return SignExtend64<16>(Imm >> 48); + default: +return std::nullopt; + } + + llvm_unreachable("covered subregister switch"); shiltian wrote: do we really need this to avoid compiler warning? https://github.com/llvm/llvm-project/pull/127484 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Extract lambda used in foldImmediate into a helper function (PR #127484)
@@ -3446,25 +3470,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (!getConstValDefinedInReg(DefMI, Reg, Imm)) shiltian wrote: [Re: line +3469] nit: I'd still prefer to initialize it even though if `getConstValDefinedInReg` returns `true`, it will be initialized. See this comment inline on https://app.graphite.dev/github/pr/llvm/llvm-project/127484?utm_source=unchanged-line-comment";>Graphite. https://github.com/llvm/llvm-project/pull/127484 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Extract lambda used in foldImmediate into a helper function (PR #127484)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/127484 >From b59c65f9ae4d20211cc01e05743505a5f493ff81 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Feb 2025 17:12:22 +0700 Subject: [PATCH 1/2] AMDGPU: Extract lambda used in foldImmediate into a helper function It was also too permissive for a more general utilty, only return the original immediate if there is no subregister. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 59 -- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 9 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4cb07b1df04ce..b5f36f67a37ac 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3437,6 +3437,30 @@ void SIInstrInfo::removeModOperands(MachineInstr &MI) const { } } +std::optional SIInstrInfo::extractSubregFromImm(int64_t Imm, + unsigned SubRegIndex) { + switch (SubRegIndex) { + case AMDGPU::NoSubRegister: +return Imm; + case AMDGPU::sub0: +return Lo_32(Imm); + case AMDGPU::sub1: +return Hi_32(Imm); + case AMDGPU::lo16: +return SignExtend64<16>(Imm); + case AMDGPU::hi16: +return SignExtend64<16>(Imm >> 16); + case AMDGPU::sub1_lo16: +return SignExtend64<16>(Imm >> 32); + case AMDGPU::sub1_hi16: +return SignExtend64<16>(Imm >> 48); + default: +return std::nullopt; + } + + llvm_unreachable("covered subregister switch"); +} + bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const { if (!MRI->hasOneNonDBGUse(Reg)) @@ -3446,25 +3470,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (!getConstValDefinedInReg(DefMI, Reg, Imm)) return false; - auto getImmFor = [=](const MachineOperand &UseOp) -> int64_t { -switch (UseOp.getSubReg()) { -default: - return Imm; -case AMDGPU::sub0: - return Lo_32(Imm); -case AMDGPU::sub1: - return Hi_32(Imm); -case AMDGPU::lo16: - return SignExtend64<16>(Imm); -case AMDGPU::hi16: - return SignExtend64<16>(Imm >> 16); -case AMDGPU::sub1_lo16: - return SignExtend64<16>(Imm >> 32); -case AMDGPU::sub1_hi16: - return SignExtend64<16>(Imm >> 48); -} - }; - assert(!DefMI.getOperand(0).getSubReg() && "Expected SSA form"); unsigned Opc = UseMI.getOpcode(); @@ -3480,7 +3485,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, : AMDGPU::V_MOV_B32_e32 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::S_MOV_B32; -APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1)), + +std::optional SubRegImm = +extractSubregFromImm(Imm, UseMI.getOperand(1).getSubReg()); + +APInt Imm(Is64Bit ? 64 : 32, *SubRegImm, /*isSigned=*/true, /*implicitTrunc=*/true); if (RI.isAGPR(*MRI, DstReg)) { @@ -3591,7 +3600,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (NewOpc == AMDGPU::V_FMAMK_F16_fake16) return false; - const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1); + const std::optional SubRegImm = extractSubregFromImm( + Imm, RegSrc == Src1 ? Src0->getSubReg() : Src1->getSubReg()); // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. @@ -3608,7 +3618,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); - Src1->ChangeToImmediate(Imm); + Src1->ChangeToImmediate(*SubRegImm); removeModOperands(UseMI); UseMI.setDesc(get(NewOpc)); @@ -3679,8 +3689,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); + const std::optional SubRegImm = + extractSubregFromImm(Imm, Src2->getSubReg()); + // ChangingToImmediate adds Src2 back to the instruction. - Src2->ChangeToImmediate(getImmFor(*Src2)); + Src2->ChangeToImmediate(*SubRegImm); // These come before src2. removeModOperands(UseMI); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index ddd15e1766f70..06dbdf65e458f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -401,6 +401,15 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { void removeModOperands(MachineInstr &MI) const; + /// Return the extracted immediate value in a subregister use f
[llvm-branch-commits] [llvm] AMDGPU: Extract lambda used in foldImmediate into a helper function (PR #127484)
@@ -3437,6 +3437,30 @@ void SIInstrInfo::removeModOperands(MachineInstr &MI) const { } } +std::optional SIInstrInfo::extractSubregFromImm(int64_t Imm, + unsigned SubRegIndex) { + switch (SubRegIndex) { + case AMDGPU::NoSubRegister: +return Imm; + case AMDGPU::sub0: +return Lo_32(Imm); + case AMDGPU::sub1: +return Hi_32(Imm); + case AMDGPU::lo16: +return SignExtend64<16>(Imm); + case AMDGPU::hi16: +return SignExtend64<16>(Imm >> 16); + case AMDGPU::sub1_lo16: +return SignExtend64<16>(Imm >> 32); + case AMDGPU::sub1_hi16: +return SignExtend64<16>(Imm >> 48); + default: +return std::nullopt; + } + + llvm_unreachable("covered subregister switch"); arsenm wrote: Yes, this should be the version that makes msvc, gcc, and clang happy https://github.com/llvm/llvm-project/pull/127484 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Extract lambda used in foldImmediate into a helper function (PR #127484)
@@ -401,6 +401,15 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { void removeModOperands(MachineInstr &MI) const; + /// Return the extracted immediate value in a subregister use from a constant + /// materialized in a super register. + /// + /// e.g. %imm = S_MOV_B64 K[0:63] + /// USE %imm.sub1 + /// This will return k[32:63] shiltian wrote: ```suggestion /// This will return K[32:63] ``` https://github.com/llvm/llvm-project/pull/127484 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -2709,13 +2709,23 @@ getRefPtrIfDeclareTarget(mlir::Value value, } namespace { +// Append customMappers information to existing MapInfosTy +struct MapInfosTy : llvm::OpenMPIRBuilder::MapInfosTy { + SmallVector Mappers; ergawy wrote: We gain better readability in this already complex file. I am not fully on board with memory footprint argument since I don't think a module will have that many instances of that struct that need to be created. Not a blocker from my side though since other reviewers are fine with it. https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -3529,6 +3549,84 @@ static void genMapInfos(llvm::IRBuilderBase &builder, } } +static llvm::Expected +emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation); + +static llvm::Expected +getOrCreateUserDefinedMapperFunc(Operation *declMapperOp, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + static llvm::DenseMap userDefMapperMap; + auto iter = userDefMapperMap.find(declMapperOp); + if (iter != userDefMapperMap.end()) +return iter->second; + llvm::Expected mapperFunc = + emitUserDefinedMapper(declMapperOp, builder, moduleTranslation); + if (!mapperFunc) +return mapperFunc.takeError(); + userDefMapperMap.try_emplace(declMapperOp, *mapperFunc); + return mapperFunc; +} + +static llvm::Expected +emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + auto declMapperOp = cast(op); + auto declMapperInfoOp = + *declMapperOp.getOps().begin(); + DataLayout dl = DataLayout(declMapperOp->getParentOfType()); + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + llvm::Type *varType = moduleTranslation.convertType(declMapperOp.getType()); + std::string mapperName = ompBuilder->createPlatformSpecificName( + {"omp_mapper", declMapperOp.getSymName()}); + SmallVector mapVars = declMapperInfoOp.getMapVars(); + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + // Fill up the arrays with all the mapped variables. + MapInfosTy combinedInfo; + auto genMapInfoCB = + [&](InsertPointTy codeGenIP, llvm::Value *ptrPHI, + llvm::Value *unused2) -> llvm::OpenMPIRBuilder::MapInfosOrErrorTy { +builder.restoreIP(codeGenIP); +moduleTranslation.mapValue(declMapperOp.getRegion().getArgument(0), ptrPHI); +moduleTranslation.mapBlock(&declMapperOp.getRegion().front(), + builder.GetInsertBlock()); +if (failed(moduleTranslation.convertBlock(declMapperOp.getRegion().front(), + /*ignoreArguments=*/true, + builder))) + return llvm::make_error(); +MapInfoData mapData; +collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl, + builder); +genMapInfos(builder, moduleTranslation, dl, combinedInfo, mapData); + +// Drop the mapping that is no longer necessary so that the same region can +// be processed multiple times. +moduleTranslation.forgetMapping(declMapperOp.getRegion()); +return combinedInfo; + }; + + auto customMapperCB = [&](unsigned i, llvm::Function **mapperFunc) { +if (combinedInfo.Mappers[i]) { + // Call the corresponding mapper function. + llvm::Expected newFn = getOrCreateUserDefinedMapperFunc( + combinedInfo.Mappers[i], builder, moduleTranslation); + assert(newFn && "Expect a valid mapper function is available"); skatrak wrote: Thank you for taking the time to do this. https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -0,0 +1,47 @@ +! Offloading test checking lowering of arrays with dynamic extents. +! REQUIRES: flang, amdgpu + +! RUN: %libomptarget-compile-fortran-run-and-check-generic + +program test_openmp_mapper + implicit none + integer, parameter :: n = 1024 + type :: mytype + integer :: data(n) + end type mytype + + ! Declare custom mappers for the derived type `mytype` + !$omp declare mapper(my_mapper1 : mytype :: t) map(to: t%data) + !$omp declare mapper(my_mapper2 : mytype :: t) map(mapper(my_mapper1): t%data) skatrak wrote: Thank you, this works for me. https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -8130,17 +8135,19 @@ Function *OpenMPIRBuilder::emitUserDefinedMapper( Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg, CurSizeArg, CurMapType, CurNameArg}; -Function *ChildMapperFn = nullptr; -if (CustomMapperCB && CustomMapperCB(I, &ChildMapperFn)) { + +auto ChildMapperFn = CustomMapperCB(I); +if (!ChildMapperFn) + return ChildMapperFn.takeError(); +if (*ChildMapperFn) // Call the corresponding mapper function. - Builder.CreateCall(ChildMapperFn, OffloadingArgs)->setDoesNotThrow(); -} else { + Builder.CreateCall(*ChildMapperFn, OffloadingArgs)->setDoesNotThrow(); +else skatrak wrote: Nit: We need braces here because of the comments inside both branches. https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
@@ -4438,15 +4551,33 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + llvm::OpenMPIRBuilder::TargetDataInfo info( + /*RequiresDevicePointerInfo=*/false, + /*SeparateBeginEndCalls=*/true); + + auto customMapperCB = + [&](unsigned int i) -> llvm::Expected { +llvm::Function *mapperFunc = nullptr; +if (combinedInfos.Mappers[i]) { + info.HasMapper = true; + llvm::Expected newFn = getOrCreateUserDefinedMapperFunc( + combinedInfos.Mappers[i], builder, moduleTranslation); + if (!newFn) +return newFn.takeError(); + mapperFunc = *newFn; +} +return mapperFunc; skatrak wrote: ```suggestion if (!combinedInfos.Mappers[i]) return nullptr; info.HasMapper = true; return getOrCreateUserDefinedMapperFunc( combinedInfos.Mappers[i], builder, moduleTranslation); ``` https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
https://github.com/skatrak edited https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [mlir] [MLIR][OpenMP] Add LLVM translation support for OpenMP UserDefinedMappers (PR #124746)
https://github.com/ergawy approved this pull request. https://github.com/llvm/llvm-project/pull/124746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix foldImmediate breaking register class constraints (PR #127481)
@@ -419,25 +419,30 @@ body: | ... -# FIXME: -# --- -# name:fold_v_mov_b64_64_to_unaligned -# body: | -# bb.0: -# %0:vreg_64_align2 = V_MOV_B64_e32 1311768467750121200, implicit $exec -# %1:vreg_64 = COPY killed %0 -# SI_RETURN_TO_EPILOG implicit %1 -# ... - -# FIXME: -# --- -# name:fold_v_mov_b64_pseudo_64_to_unaligned -# body: | -# bb.0: -# %0:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec -# %1:vreg_64 = COPY killed %0 -# SI_RETURN_TO_EPILOG implicit %1 -# ... +--- +name:fold_v_mov_b64_64_to_unaligned +body: | + bb.0: +; GCN-LABEL: name: fold_v_mov_b64_64_to_unaligned +; GCN: [[V_MOV_B64_e32_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e32 1311768467750121200, implicit $exec +; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec cdevadas wrote: %1 has been forced to *_aling2 after the transformation. I assume it is because the src register in the original copy was of *_aling2. https://github.com/llvm/llvm-project/pull/127481 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix foldImmediate breaking register class constraints (PR #127481)
@@ -3473,14 +3473,19 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, assert(UseMI.getOperand(1).getReg().isVirtual()); } +MachineFunction *MF = UseMI.getParent()->getParent(); cdevadas wrote: ```suggestion MachineFunction *MF = UseMI.getMF(); ``` https://github.com/llvm/llvm-project/pull/127481 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix foldImmediate breaking register class constraints (PR #127481)
@@ -419,25 +419,30 @@ body: | ... -# FIXME: -# --- -# name:fold_v_mov_b64_64_to_unaligned -# body: | -# bb.0: -# %0:vreg_64_align2 = V_MOV_B64_e32 1311768467750121200, implicit $exec -# %1:vreg_64 = COPY killed %0 -# SI_RETURN_TO_EPILOG implicit %1 -# ... - -# FIXME: -# --- -# name:fold_v_mov_b64_pseudo_64_to_unaligned -# body: | -# bb.0: -# %0:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec -# %1:vreg_64 = COPY killed %0 -# SI_RETURN_TO_EPILOG implicit %1 -# ... +--- +name:fold_v_mov_b64_64_to_unaligned +body: | + bb.0: +; GCN-LABEL: name: fold_v_mov_b64_64_to_unaligned +; GCN: [[V_MOV_B64_e32_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e32 1311768467750121200, implicit $exec +; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec arsenm wrote: yes https://github.com/llvm/llvm-project/pull/127481 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix foldImmediate breaking register class constraints (PR #127481)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/127481 >From 19351f47142d05f5845e3d6b12764b6b574e9a7e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Feb 2025 16:38:57 +0700 Subject: [PATCH 1/2] AMDGPU: Fix foldImmediate breaking register class constraints This fixes a verifier error when folding an immediate materialized into an aligned vgpr class into a copy to an unaligned virtual register. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp| 11 +++-- .../test/CodeGen/AMDGPU/peephole-fold-imm.mir | 43 +++ 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 8481c6333f479..0dafa527f722a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3473,14 +3473,19 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, assert(UseMI.getOperand(1).getReg().isVirtual()); } +MachineFunction *MF = UseMI.getParent()->getParent(); const MCInstrDesc &NewMCID = get(NewOpc); -if (DstReg.isPhysical() && -!RI.getRegClass(NewMCID.operands()[0].RegClass)->contains(DstReg)) +const TargetRegisterClass *NewDefRC = getRegClass(NewMCID, 0, &RI, *MF); + +if (DstReg.isPhysical()) { + if (!NewDefRC->contains(DstReg)) +return false; +} else if (!MRI->constrainRegClass(DstReg, NewDefRC)) return false; UseMI.setDesc(NewMCID); UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue()); -UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent()); +UseMI.addImplicitDefUseOperands(*MF); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir index cceed6fd008e4..227af34f3fa6f 100644 --- a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir @@ -419,25 +419,30 @@ body: | ... -# FIXME: -# --- -# name:fold_v_mov_b64_64_to_unaligned -# body: | -# bb.0: -# %0:vreg_64_align2 = V_MOV_B64_e32 1311768467750121200, implicit $exec -# %1:vreg_64 = COPY killed %0 -# SI_RETURN_TO_EPILOG implicit %1 -# ... - -# FIXME: -# --- -# name:fold_v_mov_b64_pseudo_64_to_unaligned -# body: | -# bb.0: -# %0:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec -# %1:vreg_64 = COPY killed %0 -# SI_RETURN_TO_EPILOG implicit %1 -# ... +--- +name:fold_v_mov_b64_64_to_unaligned +body: | + bb.0: +; GCN-LABEL: name: fold_v_mov_b64_64_to_unaligned +; GCN: [[V_MOV_B64_e32_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e32 1311768467750121200, implicit $exec +; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec +; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_MOV_B]] +%0:vreg_64_align2 = V_MOV_B64_e32 1311768467750121200, implicit $exec +%1:vreg_64 = COPY killed %0 +SI_RETURN_TO_EPILOG implicit %1 +... + +--- +name:fold_v_mov_b64_pseudo_64_to_unaligned +body: | + bb.0: +; GCN-LABEL: name: fold_v_mov_b64_pseudo_64_to_unaligned +; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec +; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_MOV_B]] +%0:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec +%1:vreg_64 = COPY killed %0 +SI_RETURN_TO_EPILOG implicit %1 +... --- name:fold_s_brev_b32_simm_virtual_0 >From 86471a9825bd429fcb2a6c5f5c175351c04af22b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Feb 2025 23:15:46 +0700 Subject: [PATCH 2/2] Update llvm/lib/Target/AMDGPU/SIInstrInfo.cpp Co-authored-by: Christudasan Devadasan --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0dafa527f722a..f51527d0eb148 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3473,7 +3473,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, assert(UseMI.getOperand(1).getReg().isVirtual()); } -MachineFunction *MF = UseMI.getParent()->getParent(); +MachineFunction *MF = UseMI.getMF(); const MCInstrDesc &NewMCID = get(NewOpc); const TargetRegisterClass *NewDefRC = getRegClass(NewMCID, 0, &RI, *MF); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix foldImmediate breaking register class constraints (PR #127481)
https://github.com/cdevadas approved this pull request. https://github.com/llvm/llvm-project/pull/127481 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle subregister uses in SIFoldOperands constant folding (PR #127485)
https://github.com/rampitec approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/127485 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle brev and not cases in getConstValDefinedInReg (PR #127483)
https://github.com/rampitec approved this pull request. https://github.com/llvm/llvm-project/pull/127483 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [Hexagon] Explicitly truncate constant in UAddSubO (#127360) (PR #127527)
https://github.com/topperc approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/127527 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang][CodeGen] `sret` args should always point to the `alloca` AS, so use that (#114062) (PR #127552)
llvmbot wrote: @llvm/pr-subscribers-clang Author: None (llvmbot) Changes Backport 39ec9de Requested by: @arsenm --- Patch is 79.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127552.diff 35 Files Affected: - (modified) clang/include/clang/CodeGen/CGFunctionInfo.h (+6-5) - (modified) clang/lib/CodeGen/ABIInfo.cpp (+4-4) - (modified) clang/lib/CodeGen/ABIInfo.h (+2-1) - (modified) clang/lib/CodeGen/ABIInfoImpl.cpp (+9-6) - (modified) clang/lib/CodeGen/CGCall.cpp (+20-12) - (modified) clang/lib/CodeGen/CGExprAgg.cpp (+13-6) - (modified) clang/lib/CodeGen/ItaniumCXXABI.cpp (+3-1) - (modified) clang/lib/CodeGen/MicrosoftCXXABI.cpp (+3-1) - (modified) clang/lib/CodeGen/SwiftCallingConv.cpp (+11-5) - (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+15-9) - (modified) clang/lib/CodeGen/Targets/AMDGPU.cpp (+2-1) - (modified) clang/lib/CodeGen/Targets/ARC.cpp (+7-4) - (modified) clang/lib/CodeGen/Targets/ARM.cpp (+21-11) - (modified) clang/lib/CodeGen/Targets/AVR.cpp (+1-1) - (modified) clang/lib/CodeGen/Targets/BPF.cpp (+8-4) - (modified) clang/lib/CodeGen/Targets/CSKY.cpp (+5-3) - (modified) clang/lib/CodeGen/Targets/Hexagon.cpp (+12-6) - (modified) clang/lib/CodeGen/Targets/Lanai.cpp (+9-5) - (modified) clang/lib/CodeGen/Targets/LoongArch.cpp (+9-4) - (modified) clang/lib/CodeGen/Targets/Mips.cpp (+6-4) - (modified) clang/lib/CodeGen/Targets/NVPTX.cpp (+6-2) - (modified) clang/lib/CodeGen/Targets/PNaCl.cpp (+7-5) - (modified) clang/lib/CodeGen/Targets/PPC.cpp (+22-13) - (modified) clang/lib/CodeGen/Targets/RISCV.cpp (+9-4) - (modified) clang/lib/CodeGen/Targets/SPIR.cpp (+6-3) - (modified) clang/lib/CodeGen/Targets/Sparc.cpp (+5-2) - (modified) clang/lib/CodeGen/Targets/SystemZ.cpp (+9-5) - (modified) clang/lib/CodeGen/Targets/WebAssembly.cpp (+2-1) - (modified) clang/lib/CodeGen/Targets/X86.cpp (+40-18) - (modified) clang/test/CodeGen/partial-reinitialization2.c (+2-2) - (modified) clang/test/CodeGen/sret.c (+11) - (modified) clang/test/CodeGenCXX/no-elide-constructors.cpp (+6) - (modified) clang/test/CodeGenOpenCL/addr-space-struct-arg.cl (+6-8) - (modified) clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl (+6-8) - (added) clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl (+68) ``diff diff --git a/clang/include/clang/CodeGen/CGFunctionInfo.h b/clang/include/clang/CodeGen/CGFunctionInfo.h index 9d785d878b61d..040ee025afaa8 100644 --- a/clang/include/clang/CodeGen/CGFunctionInfo.h +++ b/clang/include/clang/CodeGen/CGFunctionInfo.h @@ -206,8 +206,8 @@ class ABIArgInfo { static ABIArgInfo getIgnore() { return ABIArgInfo(Ignore); } - static ABIArgInfo getIndirect(CharUnits Alignment, bool ByVal = true, -bool Realign = false, + static ABIArgInfo getIndirect(CharUnits Alignment, unsigned AddrSpace, +bool ByVal = true, bool Realign = false, llvm::Type *Padding = nullptr) { auto AI = ABIArgInfo(Indirect); AI.setIndirectAlign(Alignment); @@ -215,6 +215,7 @@ class ABIArgInfo { AI.setIndirectRealign(Realign); AI.setSRetAfterThis(false); AI.setPaddingType(Padding); +AI.setIndirectAddrSpace(AddrSpace); return AI; } @@ -232,7 +233,7 @@ class ABIArgInfo { static ABIArgInfo getIndirectInReg(CharUnits Alignment, bool ByVal = true, bool Realign = false) { -auto AI = getIndirect(Alignment, ByVal, Realign); +auto AI = getIndirect(Alignment, 0, ByVal, Realign); AI.setInReg(true); return AI; } @@ -422,12 +423,12 @@ class ABIArgInfo { } unsigned getIndirectAddrSpace() const { -assert(isIndirectAliased() && "Invalid kind!"); +assert((isIndirect() || isIndirectAliased()) && "Invalid kind!"); return IndirectAttr.AddrSpace; } void setIndirectAddrSpace(unsigned AddrSpace) { -assert(isIndirectAliased() && "Invalid kind!"); +assert((isIndirect() || isIndirectAliased()) && "Invalid kind!"); IndirectAttr.AddrSpace = AddrSpace; } diff --git a/clang/lib/CodeGen/ABIInfo.cpp b/clang/lib/CodeGen/ABIInfo.cpp index cda8a494f6c27..d981d69913632 100644 --- a/clang/lib/CodeGen/ABIInfo.cpp +++ b/clang/lib/CodeGen/ABIInfo.cpp @@ -171,11 +171,11 @@ bool ABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const { return false; } -ABIArgInfo ABIInfo::getNaturalAlignIndirect(QualType Ty, bool ByVal, -bool Realign, +ABIArgInfo ABIInfo::getNaturalAlignIndirect(QualType Ty, unsigned AddrSpace, +bool ByVal, bool Realign, llvm::Type *Padding) const { - return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty), ByVal, - Realign, Padding); + return ABIArgInfo::getIndirect(getContext().getTypeAl
[llvm-branch-commits] [llvm] AMDGPU: Do not try to commute instruction with same input register (PR #127562)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/127562 There's little point to trying to commute an instruction if the two operands are already the same. This avoids an assertion in a future patch, but this likely isn't the correct fix. The worklist management in SIFoldOperands is dodgy, and we should probably fix it to work like PeepholeOpt (i.e. stop looking at use lists, and fold from users). This is an extension of the already handled special case which it's trying to avoid folding an instruction which is already being folded. >From b28280b92e8c3d8861e4f6e1bb924742e0c78f49 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 18 Feb 2025 10:05:30 +0700 Subject: [PATCH] AMDGPU: Do not try to commute instruction with same input register There's little point to trying to commute an instruction if the two operands are already the same. This avoids an assertion in a future patch, but this likely isn't the correct fix. The worklist management in SIFoldOperands is dodgy, and we should probably fix it to work like PeepholeOpt (i.e. stop looking at use lists, and fold from users). This is an extension of the already handled special case which it's trying to avoid folding an instruction which is already being folded. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 12 +++- llvm/test/CodeGen/AMDGPU/dag-divergence.ll| 4 +- llvm/test/CodeGen/AMDGPU/div_i128.ll | 8 +-- llvm/test/CodeGen/AMDGPU/div_v2i128.ll| 32 - llvm/test/CodeGen/AMDGPU/rem_i128.ll | 8 +-- ...-operands-commute-same-operands-assert.mir | 65 +++ 6 files changed, 102 insertions(+), 27 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/si-fold-operands-commute-same-operands-assert.mir diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 8492bb2c3518b..84773349e0ca0 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -691,11 +691,21 @@ bool SIFoldOperandsImpl::tryAddToFoldList( if (!CanCommute) return false; +MachineOperand &Op = MI->getOperand(OpNo); +MachineOperand &CommutedOp = MI->getOperand(CommuteOpNo); + // One of operands might be an Imm operand, and OpNo may refer to it after // the call of commuteInstruction() below. Such situations are avoided // here explicitly as OpNo must be a register operand to be a candidate // for memory folding. -if (!MI->getOperand(OpNo).isReg() || !MI->getOperand(CommuteOpNo).isReg()) +if (!Op.isReg() || !CommutedOp.isReg()) + return false; + +// The same situation with an immediate could reproduce if both inputs are +// the same register. +if (Op.isReg() && CommutedOp.isReg() && +(Op.getReg() == CommutedOp.getReg() && + Op.getSubReg() == CommutedOp.getSubReg())) return false; if (!TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo)) diff --git a/llvm/test/CodeGen/AMDGPU/dag-divergence.ll b/llvm/test/CodeGen/AMDGPU/dag-divergence.ll index dfc28539ea814..0f573fcc6deaa 100644 --- a/llvm/test/CodeGen/AMDGPU/dag-divergence.ll +++ b/llvm/test/CodeGen/AMDGPU/dag-divergence.ll @@ -37,8 +37,8 @@ define amdgpu_kernel void @flat_load_maybe_divergent(ptr addrspace(4) %k, ptr %f ; GCN-LABEL: {{^}}wide_carry_divergence_error: ; GCN: v_sub_u32_e32 ; GCN: v_subb_u32_e32 -; GCN: v_subbrev_u32_e32 -; GCN: v_subbrev_u32_e32 +; GCN: v_subb_u32_e32 +; GCN: v_subb_u32_e32 define <2 x i128> @wide_carry_divergence_error(i128 %arg) { %i = call i128 @llvm.ctlz.i128(i128 %arg, i1 false) %i1 = sub i128 0, %i diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll index 59bc7f332bf1e..3d9043d30c1ce 100644 --- a/llvm/test/CodeGen/AMDGPU/div_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll @@ -65,8 +65,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-NEXT:v_cndmask_b32_e64 v7, v7, 0, vcc ; GFX9-NEXT:v_sub_co_u32_e32 v2, vcc, v2, v3 ; GFX9-NEXT:v_subb_co_u32_e32 v3, vcc, v4, v7, vcc -; GFX9-NEXT:v_subbrev_co_u32_e32 v4, vcc, 0, v5, vcc -; GFX9-NEXT:v_subbrev_co_u32_e32 v5, vcc, 0, v5, vcc +; GFX9-NEXT:v_subb_co_u32_e32 v4, vcc, 0, v5, vcc +; GFX9-NEXT:v_subb_co_u32_e32 v5, vcc, 0, v5, vcc ; GFX9-NEXT:s_mov_b64 s[6:7], 0x7f ; GFX9-NEXT:v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3] ; GFX9-NEXT:v_mov_b32_e32 v18, v16 @@ -2355,8 +2355,8 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-NEXT:v_sub_co_u32_e32 v12, vcc, v8, v9 ; GFX9-NEXT:v_subb_co_u32_e32 v13, vcc, v10, v13, vcc ; GFX9-NEXT:v_mov_b32_e32 v8, 0 -; GFX9-NEXT:v_subbrev_co_u32_e32 v14, vcc, 0, v8, vcc -; GFX9-NEXT:v_subbrev_co_u32_e32 v15, vcc, 0, v8, vcc +; GFX9-NEXT:v_subb_co_u32_e32 v14, vcc, 0, v8, vcc +; GFX9-NEXT:v_subb_co_u32_e32 v15, vcc, 0, v8, vcc ; GFX9-NEXT:v_cmp_lt_u64_e32 vcc, s[6:7], v[12:13] ; GFX9-NEXT:v_o
[llvm-branch-commits] [llvm] AMDGPU: Fix overly conservative immediate operand check (PR #127563)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/127563 The real legality check is peformed later anyway, so this was unnecessarily blocking immediate folds in handled cases. This also stops folding s_fmac_f32 to s_fmamk_f32 in a few tests, but that seems better. The globalisel changes look suspicious, it may be mishandling constants for VOP3P instructions. >From 3dd61c69e1cd3cab752cac624c0a5be42b0ca193 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Feb 2025 22:31:48 +0700 Subject: [PATCH] AMDGPU: Fix overly conservative immediate operand check The real legality check is peformed later anyway, so this was unnecessarily blocking immediate folds in handled cases. This also stops folding s_fmac_f32 to s_fmamk_f32 in a few tests, but that seems better. The globalisel changes look suspicious, it may be mishandling constants for VOP3P instructions. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp| 3 ++- llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll | 16 llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll | 16 llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll | 4 +--- llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll | 6 ++ llvm/test/CodeGen/AMDGPU/constrained-shift.ll| 6 ++ .../CodeGen/AMDGPU/fold-operands-scalar-fmac.mir | 4 ++-- llvm/test/CodeGen/AMDGPU/global-saddr-load.ll| 5 + llvm/test/CodeGen/AMDGPU/packed-fp32.ll | 10 +- llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll| 4 ++-- 10 files changed, 25 insertions(+), 49 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 84773349e0ca0..cbd858b9002ee 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -830,7 +830,8 @@ bool SIFoldOperandsImpl::tryToFoldACImm( if (UseOpIdx >= Desc.getNumOperands()) return false; - if (!AMDGPU::isSISrcInlinableOperand(Desc, UseOpIdx)) + // Filter out unhandled pseudos. + if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx)) return false; uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll index 4be00fedb972e..89078f20f1d47 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll @@ -920,9 +920,7 @@ define amdgpu_ps i64 @s_andn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1 ; GFX6-NEXT:s_lshl_b32 s3, s9, 16 ; GFX6-NEXT:s_and_b32 s4, s8, 0x ; GFX6-NEXT:s_or_b32 s3, s3, s4 -; GFX6-NEXT:s_mov_b32 s4, -1 -; GFX6-NEXT:s_mov_b32 s5, s4 -; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], -1 ; GFX6-NEXT:s_and_b64 s[0:1], s[0:1], s[2:3] ; GFX6-NEXT:; return to shader part epilog ; @@ -962,9 +960,7 @@ define amdgpu_ps i64 @s_andn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inr ; GFX6-NEXT:s_lshl_b32 s3, s9, 16 ; GFX6-NEXT:s_and_b32 s4, s8, 0x ; GFX6-NEXT:s_or_b32 s3, s3, s4 -; GFX6-NEXT:s_mov_b32 s4, -1 -; GFX6-NEXT:s_mov_b32 s5, s4 -; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], -1 ; GFX6-NEXT:s_and_b64 s[0:1], s[2:3], s[0:1] ; GFX6-NEXT:; return to shader part epilog ; @@ -1004,9 +1000,7 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_use(<4 x i16> inreg %src0, <4 ; GFX6-NEXT:s_lshl_b32 s3, s9, 16 ; GFX6-NEXT:s_and_b32 s4, s8, 0x ; GFX6-NEXT:s_or_b32 s3, s3, s4 -; GFX6-NEXT:s_mov_b32 s4, -1 -; GFX6-NEXT:s_mov_b32 s5, s4 -; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], -1 ; GFX6-NEXT:s_and_b64 s[0:1], s[0:1], s[2:3] ; GFX6-NEXT:; return to shader part epilog ; @@ -1060,9 +1054,7 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_foldable_use(<4 x i16> inreg ; GFX6-NEXT:s_lshl_b32 s5, s13, 16 ; GFX6-NEXT:s_and_b32 s6, s12, 0x ; GFX6-NEXT:s_or_b32 s5, s5, s6 -; GFX6-NEXT:s_mov_b32 s6, -1 -; GFX6-NEXT:s_mov_b32 s7, s6 -; GFX6-NEXT:s_xor_b64 s[4:5], s[4:5], s[6:7] +; GFX6-NEXT:s_xor_b64 s[4:5], s[4:5], -1 ; GFX6-NEXT:s_and_b64 s[0:1], s[0:1], s[4:5] ; GFX6-NEXT:s_and_b64 s[2:3], s[2:3], s[4:5] ; GFX6-NEXT:; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll index e7119c89ac06c..065fadf3b5ef3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll @@ -919,9 +919,7 @@ define amdgpu_ps i64 @s_orn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1) ; GFX6-NEXT:s_lshl_b32 s3, s9, 16 ; GFX6-NEXT:s_and_b32 s4, s8, 0x ; GFX6-NEXT:s_or_b32 s3, s3, s4 -; GFX6-NEXT:s_mov_b32 s4, -1 -; GFX6-NEXT:s_mov_b32 s5, s4 -; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], s[4:
[llvm-branch-commits] [llvm] AMDGPU: Handle subregister uses in SIFoldOperands constant folding (PR #127485)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/127485 >From d978a9636ea12626dd7650efffba63fe8a91e1a4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Feb 2025 17:18:27 +0700 Subject: [PATCH] AMDGPU: Handle subregister uses in SIFoldOperands constant folding --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 57 +++ .../AMDGPU/constant-fold-imm-immreg.mir | 34 +++ 2 files changed, 67 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 999553bfaff38..8492bb2c3518b 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -123,7 +123,7 @@ class SIFoldOperandsImpl { SmallVectorImpl &FoldList, SmallVectorImpl &CopiesToReplace) const; - MachineOperand *getImmOrMaterializedImm(MachineOperand &Op) const; + std::optional getImmOrMaterializedImm(MachineOperand &Op) const; bool tryConstantFoldOp(MachineInstr *MI) const; bool tryFoldCndMask(MachineInstr &MI) const; bool tryFoldZeroHighBits(MachineInstr &MI) const; @@ -1298,21 +1298,22 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) { MI.removeOperand(I); } -MachineOperand * +std::optional SIFoldOperandsImpl::getImmOrMaterializedImm(MachineOperand &Op) const { - // If this has a subregister, it obviously is a register source. - if (!Op.isReg() || Op.getSubReg() != AMDGPU::NoSubRegister || - !Op.getReg().isVirtual()) -return &Op; + if (Op.isImm()) +return Op.getImm(); - MachineInstr *Def = MRI->getVRegDef(Op.getReg()); + if (!Op.isReg() || !Op.getReg().isVirtual()) +return std::nullopt; + + const MachineInstr *Def = MRI->getVRegDef(Op.getReg()); if (Def && Def->isMoveImmediate()) { -MachineOperand &ImmSrc = Def->getOperand(1); +const MachineOperand &ImmSrc = Def->getOperand(1); if (ImmSrc.isImm()) - return &ImmSrc; + return TII->extractSubregFromImm(ImmSrc.getImm(), Op.getSubReg()); } - return &Op; + return std::nullopt; } // Try to simplify operations with a constant that may appear after instruction @@ -1327,12 +1328,14 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); if (Src0Idx == -1) return false; - MachineOperand *Src0 = getImmOrMaterializedImm(MI->getOperand(Src0Idx)); + + MachineOperand *Src0 = &MI->getOperand(Src0Idx); + std::optional Src0Imm = getImmOrMaterializedImm(*Src0); if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 || Opc == AMDGPU::S_NOT_B32) && - Src0->isImm()) { -MI->getOperand(1).ChangeToImmediate(~Src0->getImm()); + Src0Imm) { +MI->getOperand(1).ChangeToImmediate(~*Src0Imm); mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32))); return true; } @@ -1340,17 +1343,19 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); if (Src1Idx == -1) return false; - MachineOperand *Src1 = getImmOrMaterializedImm(MI->getOperand(Src1Idx)); - if (!Src0->isImm() && !Src1->isImm()) + MachineOperand *Src1 = &MI->getOperand(Src1Idx); + std::optional Src1Imm = getImmOrMaterializedImm(*Src1); + + if (!Src0Imm && !Src1Imm) return false; // and k0, k1 -> v_mov_b32 (k0 & k1) // or k0, k1 -> v_mov_b32 (k0 | k1) // xor k0, k1 -> v_mov_b32 (k0 ^ k1) - if (Src0->isImm() && Src1->isImm()) { + if (Src0Imm && Src1Imm) { int32_t NewImm; -if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm())) +if (!evalBinaryInstruction(Opc, NewImm, *Src0Imm, *Src1Imm)) return false; bool IsSGPR = TRI->isSGPRReg(*MRI, MI->getOperand(0).getReg()); @@ -1366,12 +1371,13 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { if (!MI->isCommutable()) return false; - if (Src0->isImm() && !Src1->isImm()) { + if (Src0Imm && !Src1Imm) { std::swap(Src0, Src1); std::swap(Src0Idx, Src1Idx); +std::swap(Src0Imm, Src1Imm); } - int32_t Src1Val = static_cast(Src1->getImm()); + int32_t Src1Val = static_cast(*Src1Imm); if (Opc == AMDGPU::V_OR_B32_e64 || Opc == AMDGPU::V_OR_B32_e32 || Opc == AMDGPU::S_OR_B32) { @@ -1428,9 +1434,12 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const { MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (!Src1->isIdenticalTo(*Src0)) { -auto *Src0Imm = getImmOrMaterializedImm(*Src0); -auto *Src1Imm = getImmOrMaterializedImm(*Src1); -if (!Src1Imm->isIdenticalTo(*Src0Imm)) +std::optional Src1Imm = getImmOrMaterializedImm(*Src1); +if (!Src1Imm) + return false; + +std::optional Src0I
[llvm-branch-commits] [llvm] AMDGPU: Handle brev and not cases in getConstValDefinedInReg (PR #127483)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/127483 >From aa2d8fa644299f58b2593f2f3c5cf532fdf4cdae Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Feb 2025 15:25:29 +0700 Subject: [PATCH] AMDGPU: Handle brev and not cases in getConstValDefinedInReg We should not encounter these cases in the peephole-opt use today, but get the common helper function to handle these. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp| 24 +-- .../test/CodeGen/AMDGPU/peephole-fold-imm.mir | 24 +-- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9e99df7524f4d..4ee5ebd7681b8 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1330,8 +1330,6 @@ Register SIInstrInfo::insertNE(MachineBasicBlock *MBB, bool SIInstrInfo::getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const { - // TODO: Handle all the special cases handled in SIShrinkInstructions - // (e.g. s_brev_b32 imm -> reverse(imm)) switch (MI.getOpcode()) { case AMDGPU::V_MOV_B32_e32: case AMDGPU::S_MOV_B32: @@ -1349,6 +1347,28 @@ bool SIInstrInfo::getConstValDefinedInReg(const MachineInstr &MI, return false; } + case AMDGPU::S_BREV_B32: + case AMDGPU::V_BFREV_B32_e32: + case AMDGPU::V_BFREV_B32_e64: { +const MachineOperand &Src0 = MI.getOperand(1); +if (Src0.isImm()) { + ImmVal = static_cast(reverseBits(Src0.getImm())); + return MI.getOperand(0).getReg() == Reg; +} + +return false; + } + case AMDGPU::S_NOT_B32: + case AMDGPU::V_NOT_B32_e32: + case AMDGPU::V_NOT_B32_e64: { +const MachineOperand &Src0 = MI.getOperand(1); +if (Src0.isImm()) { + ImmVal = static_cast(~static_cast(Src0.getImm())); + return MI.getOperand(0).getReg() == Reg; +} + +return false; + } default: return false; } diff --git a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir index 227af34f3fa6f..ddeb45a48a6ee 100644 --- a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir @@ -451,7 +451,7 @@ body: | ; GCN-LABEL: name: fold_s_brev_b32_simm_virtual_0 ; GCN: [[S_BREV_B32_:%[0-9]+]]:sreg_32 = S_BREV_B32 1 -; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed [[S_BREV_B32_]] +; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GCN-NEXT: SI_RETURN_TO_EPILOG %0:sreg_32 = S_BREV_B32 1 %1:sreg_32 = COPY killed %0 @@ -466,7 +466,7 @@ body: | ; GCN-LABEL: name: fold_s_brev_b32_simm_virtual_1 ; GCN: [[S_BREV_B32_:%[0-9]+]]:sreg_32 = S_BREV_B32 -64 -; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed [[S_BREV_B32_]] +; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 67108863 ; GCN-NEXT: SI_RETURN_TO_EPILOG %0:sreg_32 = S_BREV_B32 -64 %1:sreg_32 = COPY killed %0 @@ -481,8 +481,8 @@ body: | ; GCN-LABEL: name: fold_v_bfrev_b32_e32_imm ; GCN: [[V_BFREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e32 1, implicit $exec -; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed [[V_BFREV_B32_e32_]] -; GCN-NEXT: SI_RETURN_TO_EPILOG [[COPY]] +; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec +; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_MOV_B32_e32_]] %0:vgpr_32 = V_BFREV_B32_e32 1, implicit $exec %1:vgpr_32 = COPY killed %0 SI_RETURN_TO_EPILOG %1 @@ -496,8 +496,8 @@ body: | ; GCN-LABEL: name: fold_v_bfrev_b32_e64_imm ; GCN: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 1, implicit $exec -; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed [[V_BFREV_B32_e64_]] -; GCN-NEXT: SI_RETURN_TO_EPILOG [[COPY]] +; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec +; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_MOV_B32_e32_]] %0:vgpr_32 = V_BFREV_B32_e64 1, implicit $exec %1:vgpr_32 = COPY killed %0 SI_RETURN_TO_EPILOG %1 @@ -511,7 +511,7 @@ body: | ; GCN-LABEL: name: fold_s_not_b32_simm_virtual_0 ; GCN: [[S_NOT_B32_:%[0-9]+]]:sreg_32 = S_NOT_B32 1, implicit-def $scc -; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed [[S_NOT_B32_]] +; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2 ; GCN-NEXT: SI_RETURN_TO_EPILOG %0:sreg_32 = S_NOT_B32 1, implicit-def $scc %1:sreg_32 = COPY killed %0 @@ -526,7 +526,7 @@ body: | ; GCN-LABEL: name: fold_s_not_b32_simm_virtual_1 ; GCN: [[S_NOT_B32_:%[0-9]+]]:sreg_32 = S_NOT_B32 -64, implicit-def $scc -; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed [[S_NOT_B32_]] +; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg
[llvm-branch-commits] [llvm] AMDGPU: Handle subregister uses in SIFoldOperands constant folding (PR #127485)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/127485 >From d978a9636ea12626dd7650efffba63fe8a91e1a4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Feb 2025 17:18:27 +0700 Subject: [PATCH] AMDGPU: Handle subregister uses in SIFoldOperands constant folding --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 57 +++ .../AMDGPU/constant-fold-imm-immreg.mir | 34 +++ 2 files changed, 67 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 999553bfaff38..8492bb2c3518b 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -123,7 +123,7 @@ class SIFoldOperandsImpl { SmallVectorImpl &FoldList, SmallVectorImpl &CopiesToReplace) const; - MachineOperand *getImmOrMaterializedImm(MachineOperand &Op) const; + std::optional getImmOrMaterializedImm(MachineOperand &Op) const; bool tryConstantFoldOp(MachineInstr *MI) const; bool tryFoldCndMask(MachineInstr &MI) const; bool tryFoldZeroHighBits(MachineInstr &MI) const; @@ -1298,21 +1298,22 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) { MI.removeOperand(I); } -MachineOperand * +std::optional SIFoldOperandsImpl::getImmOrMaterializedImm(MachineOperand &Op) const { - // If this has a subregister, it obviously is a register source. - if (!Op.isReg() || Op.getSubReg() != AMDGPU::NoSubRegister || - !Op.getReg().isVirtual()) -return &Op; + if (Op.isImm()) +return Op.getImm(); - MachineInstr *Def = MRI->getVRegDef(Op.getReg()); + if (!Op.isReg() || !Op.getReg().isVirtual()) +return std::nullopt; + + const MachineInstr *Def = MRI->getVRegDef(Op.getReg()); if (Def && Def->isMoveImmediate()) { -MachineOperand &ImmSrc = Def->getOperand(1); +const MachineOperand &ImmSrc = Def->getOperand(1); if (ImmSrc.isImm()) - return &ImmSrc; + return TII->extractSubregFromImm(ImmSrc.getImm(), Op.getSubReg()); } - return &Op; + return std::nullopt; } // Try to simplify operations with a constant that may appear after instruction @@ -1327,12 +1328,14 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); if (Src0Idx == -1) return false; - MachineOperand *Src0 = getImmOrMaterializedImm(MI->getOperand(Src0Idx)); + + MachineOperand *Src0 = &MI->getOperand(Src0Idx); + std::optional Src0Imm = getImmOrMaterializedImm(*Src0); if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 || Opc == AMDGPU::S_NOT_B32) && - Src0->isImm()) { -MI->getOperand(1).ChangeToImmediate(~Src0->getImm()); + Src0Imm) { +MI->getOperand(1).ChangeToImmediate(~*Src0Imm); mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32))); return true; } @@ -1340,17 +1343,19 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); if (Src1Idx == -1) return false; - MachineOperand *Src1 = getImmOrMaterializedImm(MI->getOperand(Src1Idx)); - if (!Src0->isImm() && !Src1->isImm()) + MachineOperand *Src1 = &MI->getOperand(Src1Idx); + std::optional Src1Imm = getImmOrMaterializedImm(*Src1); + + if (!Src0Imm && !Src1Imm) return false; // and k0, k1 -> v_mov_b32 (k0 & k1) // or k0, k1 -> v_mov_b32 (k0 | k1) // xor k0, k1 -> v_mov_b32 (k0 ^ k1) - if (Src0->isImm() && Src1->isImm()) { + if (Src0Imm && Src1Imm) { int32_t NewImm; -if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm())) +if (!evalBinaryInstruction(Opc, NewImm, *Src0Imm, *Src1Imm)) return false; bool IsSGPR = TRI->isSGPRReg(*MRI, MI->getOperand(0).getReg()); @@ -1366,12 +1371,13 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { if (!MI->isCommutable()) return false; - if (Src0->isImm() && !Src1->isImm()) { + if (Src0Imm && !Src1Imm) { std::swap(Src0, Src1); std::swap(Src0Idx, Src1Idx); +std::swap(Src0Imm, Src1Imm); } - int32_t Src1Val = static_cast(Src1->getImm()); + int32_t Src1Val = static_cast(*Src1Imm); if (Opc == AMDGPU::V_OR_B32_e64 || Opc == AMDGPU::V_OR_B32_e32 || Opc == AMDGPU::S_OR_B32) { @@ -1428,9 +1434,12 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const { MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (!Src1->isIdenticalTo(*Src0)) { -auto *Src0Imm = getImmOrMaterializedImm(*Src0); -auto *Src1Imm = getImmOrMaterializedImm(*Src1); -if (!Src1Imm->isIdenticalTo(*Src0Imm)) +std::optional Src1Imm = getImmOrMaterializedImm(*Src1); +if (!Src1Imm) + return false; + +std::optional Src0I
[llvm-branch-commits] [llvm] AMDGPU: Handle brev and not cases in getConstValDefinedInReg (PR #127483)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/127483 >From aa2d8fa644299f58b2593f2f3c5cf532fdf4cdae Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Feb 2025 15:25:29 +0700 Subject: [PATCH] AMDGPU: Handle brev and not cases in getConstValDefinedInReg We should not encounter these cases in the peephole-opt use today, but get the common helper function to handle these. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp| 24 +-- .../test/CodeGen/AMDGPU/peephole-fold-imm.mir | 24 +-- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9e99df7524f4d..4ee5ebd7681b8 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1330,8 +1330,6 @@ Register SIInstrInfo::insertNE(MachineBasicBlock *MBB, bool SIInstrInfo::getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const { - // TODO: Handle all the special cases handled in SIShrinkInstructions - // (e.g. s_brev_b32 imm -> reverse(imm)) switch (MI.getOpcode()) { case AMDGPU::V_MOV_B32_e32: case AMDGPU::S_MOV_B32: @@ -1349,6 +1347,28 @@ bool SIInstrInfo::getConstValDefinedInReg(const MachineInstr &MI, return false; } + case AMDGPU::S_BREV_B32: + case AMDGPU::V_BFREV_B32_e32: + case AMDGPU::V_BFREV_B32_e64: { +const MachineOperand &Src0 = MI.getOperand(1); +if (Src0.isImm()) { + ImmVal = static_cast(reverseBits(Src0.getImm())); + return MI.getOperand(0).getReg() == Reg; +} + +return false; + } + case AMDGPU::S_NOT_B32: + case AMDGPU::V_NOT_B32_e32: + case AMDGPU::V_NOT_B32_e64: { +const MachineOperand &Src0 = MI.getOperand(1); +if (Src0.isImm()) { + ImmVal = static_cast(~static_cast(Src0.getImm())); + return MI.getOperand(0).getReg() == Reg; +} + +return false; + } default: return false; } diff --git a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir index 227af34f3fa6f..ddeb45a48a6ee 100644 --- a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir @@ -451,7 +451,7 @@ body: | ; GCN-LABEL: name: fold_s_brev_b32_simm_virtual_0 ; GCN: [[S_BREV_B32_:%[0-9]+]]:sreg_32 = S_BREV_B32 1 -; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed [[S_BREV_B32_]] +; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GCN-NEXT: SI_RETURN_TO_EPILOG %0:sreg_32 = S_BREV_B32 1 %1:sreg_32 = COPY killed %0 @@ -466,7 +466,7 @@ body: | ; GCN-LABEL: name: fold_s_brev_b32_simm_virtual_1 ; GCN: [[S_BREV_B32_:%[0-9]+]]:sreg_32 = S_BREV_B32 -64 -; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed [[S_BREV_B32_]] +; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 67108863 ; GCN-NEXT: SI_RETURN_TO_EPILOG %0:sreg_32 = S_BREV_B32 -64 %1:sreg_32 = COPY killed %0 @@ -481,8 +481,8 @@ body: | ; GCN-LABEL: name: fold_v_bfrev_b32_e32_imm ; GCN: [[V_BFREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e32 1, implicit $exec -; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed [[V_BFREV_B32_e32_]] -; GCN-NEXT: SI_RETURN_TO_EPILOG [[COPY]] +; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec +; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_MOV_B32_e32_]] %0:vgpr_32 = V_BFREV_B32_e32 1, implicit $exec %1:vgpr_32 = COPY killed %0 SI_RETURN_TO_EPILOG %1 @@ -496,8 +496,8 @@ body: | ; GCN-LABEL: name: fold_v_bfrev_b32_e64_imm ; GCN: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 1, implicit $exec -; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed [[V_BFREV_B32_e64_]] -; GCN-NEXT: SI_RETURN_TO_EPILOG [[COPY]] +; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec +; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_MOV_B32_e32_]] %0:vgpr_32 = V_BFREV_B32_e64 1, implicit $exec %1:vgpr_32 = COPY killed %0 SI_RETURN_TO_EPILOG %1 @@ -511,7 +511,7 @@ body: | ; GCN-LABEL: name: fold_s_not_b32_simm_virtual_0 ; GCN: [[S_NOT_B32_:%[0-9]+]]:sreg_32 = S_NOT_B32 1, implicit-def $scc -; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed [[S_NOT_B32_]] +; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2 ; GCN-NEXT: SI_RETURN_TO_EPILOG %0:sreg_32 = S_NOT_B32 1, implicit-def $scc %1:sreg_32 = COPY killed %0 @@ -526,7 +526,7 @@ body: | ; GCN-LABEL: name: fold_s_not_b32_simm_virtual_1 ; GCN: [[S_NOT_B32_:%[0-9]+]]:sreg_32 = S_NOT_B32 -64, implicit-def $scc -; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed [[S_NOT_B32_]] +; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg
[llvm-branch-commits] [llvm] AMDGPU: Extract lambda used in foldImmediate into a helper function (PR #127484)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/127484 >From 3a2b041e192d8ec5f45734d8ec7321e77e62145c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Feb 2025 17:12:22 +0700 Subject: [PATCH 1/2] AMDGPU: Extract lambda used in foldImmediate into a helper function It was also too permissive for a more general utilty, only return the original immediate if there is no subregister. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 59 -- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 9 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4ee5ebd7681b8..07addb38b8711 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3437,6 +3437,30 @@ void SIInstrInfo::removeModOperands(MachineInstr &MI) const { } } +std::optional SIInstrInfo::extractSubregFromImm(int64_t Imm, + unsigned SubRegIndex) { + switch (SubRegIndex) { + case AMDGPU::NoSubRegister: +return Imm; + case AMDGPU::sub0: +return Lo_32(Imm); + case AMDGPU::sub1: +return Hi_32(Imm); + case AMDGPU::lo16: +return SignExtend64<16>(Imm); + case AMDGPU::hi16: +return SignExtend64<16>(Imm >> 16); + case AMDGPU::sub1_lo16: +return SignExtend64<16>(Imm >> 32); + case AMDGPU::sub1_hi16: +return SignExtend64<16>(Imm >> 48); + default: +return std::nullopt; + } + + llvm_unreachable("covered subregister switch"); +} + bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const { if (!MRI->hasOneNonDBGUse(Reg)) @@ -3446,25 +3470,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (!getConstValDefinedInReg(DefMI, Reg, Imm)) return false; - auto getImmFor = [=](const MachineOperand &UseOp) -> int64_t { -switch (UseOp.getSubReg()) { -default: - return Imm; -case AMDGPU::sub0: - return Lo_32(Imm); -case AMDGPU::sub1: - return Hi_32(Imm); -case AMDGPU::lo16: - return SignExtend64<16>(Imm); -case AMDGPU::hi16: - return SignExtend64<16>(Imm >> 16); -case AMDGPU::sub1_lo16: - return SignExtend64<16>(Imm >> 32); -case AMDGPU::sub1_hi16: - return SignExtend64<16>(Imm >> 48); -} - }; - assert(!DefMI.getOperand(0).getSubReg() && "Expected SSA form"); unsigned Opc = UseMI.getOpcode(); @@ -3480,7 +3485,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, : AMDGPU::V_MOV_B32_e32 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::S_MOV_B32; -APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1)), + +std::optional SubRegImm = +extractSubregFromImm(Imm, UseMI.getOperand(1).getSubReg()); + +APInt Imm(Is64Bit ? 64 : 32, *SubRegImm, /*isSigned=*/true, /*implicitTrunc=*/true); if (RI.isAGPR(*MRI, DstReg)) { @@ -3591,7 +3600,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (NewOpc == AMDGPU::V_FMAMK_F16_fake16) return false; - const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1); + const std::optional SubRegImm = extractSubregFromImm( + Imm, RegSrc == Src1 ? Src0->getSubReg() : Src1->getSubReg()); // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. @@ -3608,7 +3618,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); - Src1->ChangeToImmediate(Imm); + Src1->ChangeToImmediate(*SubRegImm); removeModOperands(UseMI); UseMI.setDesc(get(NewOpc)); @@ -3679,8 +3689,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); + const std::optional SubRegImm = + extractSubregFromImm(Imm, Src2->getSubReg()); + // ChangingToImmediate adds Src2 back to the instruction. - Src2->ChangeToImmediate(getImmFor(*Src2)); + Src2->ChangeToImmediate(*SubRegImm); // These come before src2. removeModOperands(UseMI); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index ddd15e1766f70..06dbdf65e458f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -401,6 +401,15 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { void removeModOperands(MachineInstr &MI) const; + /// Return the extracted immediate value in a subregister use f
[llvm-branch-commits] [llvm] AMDGPU: Extract lambda used in foldImmediate into a helper function (PR #127484)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/127484 >From 3a2b041e192d8ec5f45734d8ec7321e77e62145c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Feb 2025 17:12:22 +0700 Subject: [PATCH 1/2] AMDGPU: Extract lambda used in foldImmediate into a helper function It was also too permissive for a more general utilty, only return the original immediate if there is no subregister. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 59 -- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 9 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4ee5ebd7681b8..07addb38b8711 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3437,6 +3437,30 @@ void SIInstrInfo::removeModOperands(MachineInstr &MI) const { } } +std::optional SIInstrInfo::extractSubregFromImm(int64_t Imm, + unsigned SubRegIndex) { + switch (SubRegIndex) { + case AMDGPU::NoSubRegister: +return Imm; + case AMDGPU::sub0: +return Lo_32(Imm); + case AMDGPU::sub1: +return Hi_32(Imm); + case AMDGPU::lo16: +return SignExtend64<16>(Imm); + case AMDGPU::hi16: +return SignExtend64<16>(Imm >> 16); + case AMDGPU::sub1_lo16: +return SignExtend64<16>(Imm >> 32); + case AMDGPU::sub1_hi16: +return SignExtend64<16>(Imm >> 48); + default: +return std::nullopt; + } + + llvm_unreachable("covered subregister switch"); +} + bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const { if (!MRI->hasOneNonDBGUse(Reg)) @@ -3446,25 +3470,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (!getConstValDefinedInReg(DefMI, Reg, Imm)) return false; - auto getImmFor = [=](const MachineOperand &UseOp) -> int64_t { -switch (UseOp.getSubReg()) { -default: - return Imm; -case AMDGPU::sub0: - return Lo_32(Imm); -case AMDGPU::sub1: - return Hi_32(Imm); -case AMDGPU::lo16: - return SignExtend64<16>(Imm); -case AMDGPU::hi16: - return SignExtend64<16>(Imm >> 16); -case AMDGPU::sub1_lo16: - return SignExtend64<16>(Imm >> 32); -case AMDGPU::sub1_hi16: - return SignExtend64<16>(Imm >> 48); -} - }; - assert(!DefMI.getOperand(0).getSubReg() && "Expected SSA form"); unsigned Opc = UseMI.getOpcode(); @@ -3480,7 +3485,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, : AMDGPU::V_MOV_B32_e32 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::S_MOV_B32; -APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1)), + +std::optional SubRegImm = +extractSubregFromImm(Imm, UseMI.getOperand(1).getSubReg()); + +APInt Imm(Is64Bit ? 64 : 32, *SubRegImm, /*isSigned=*/true, /*implicitTrunc=*/true); if (RI.isAGPR(*MRI, DstReg)) { @@ -3591,7 +3600,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (NewOpc == AMDGPU::V_FMAMK_F16_fake16) return false; - const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1); + const std::optional SubRegImm = extractSubregFromImm( + Imm, RegSrc == Src1 ? Src0->getSubReg() : Src1->getSubReg()); // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. @@ -3608,7 +3618,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); - Src1->ChangeToImmediate(Imm); + Src1->ChangeToImmediate(*SubRegImm); removeModOperands(UseMI); UseMI.setDesc(get(NewOpc)); @@ -3679,8 +3689,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); + const std::optional SubRegImm = + extractSubregFromImm(Imm, Src2->getSubReg()); + // ChangingToImmediate adds Src2 back to the instruction. - Src2->ChangeToImmediate(getImmFor(*Src2)); + Src2->ChangeToImmediate(*SubRegImm); // These come before src2. removeModOperands(UseMI); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index ddd15e1766f70..06dbdf65e458f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -401,6 +401,15 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { void removeModOperands(MachineInstr &MI) const; + /// Return the extracted immediate value in a subregister use f
[llvm-branch-commits] [llvm] AMDGPU: Do not try to commute instruction with same input register (PR #127562)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/127562 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Do not try to commute instruction with same input register (PR #127562)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/127562?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#127563** https://app.graphite.dev/github/pr/llvm/llvm-project/127563?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127562** https://app.graphite.dev/github/pr/llvm/llvm-project/127562?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/127562?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#127485** https://app.graphite.dev/github/pr/llvm/llvm-project/127485?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127484** https://app.graphite.dev/github/pr/llvm/llvm-project/127484?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127483** https://app.graphite.dev/github/pr/llvm/llvm-project/127483?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127482** https://app.graphite.dev/github/pr/llvm/llvm-project/127482?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127481** https://app.graphite.dev/github/pr/llvm/llvm-project/127481?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127480** https://app.graphite.dev/github/pr/llvm/llvm-project/127480?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/127562 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Do not try to commute instruction with same input register (PR #127562)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes There's little point to trying to commute an instruction if the two operands are already the same. This avoids an assertion in a future patch, but this likely isn't the correct fix. The worklist management in SIFoldOperands is dodgy, and we should probably fix it to work like PeepholeOpt (i.e. stop looking at use lists, and fold from users). This is an extension of the already handled special case which it's trying to avoid folding an instruction which is already being folded. --- Full diff: https://github.com/llvm/llvm-project/pull/127562.diff 6 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+11-1) - (modified) llvm/test/CodeGen/AMDGPU/dag-divergence.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/div_i128.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/div_v2i128.ll (+16-16) - (modified) llvm/test/CodeGen/AMDGPU/rem_i128.ll (+4-4) - (added) llvm/test/CodeGen/AMDGPU/si-fold-operands-commute-same-operands-assert.mir (+65) ``diff diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 8492bb2c3518b..84773349e0ca0 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -691,11 +691,21 @@ bool SIFoldOperandsImpl::tryAddToFoldList( if (!CanCommute) return false; +MachineOperand &Op = MI->getOperand(OpNo); +MachineOperand &CommutedOp = MI->getOperand(CommuteOpNo); + // One of operands might be an Imm operand, and OpNo may refer to it after // the call of commuteInstruction() below. Such situations are avoided // here explicitly as OpNo must be a register operand to be a candidate // for memory folding. -if (!MI->getOperand(OpNo).isReg() || !MI->getOperand(CommuteOpNo).isReg()) +if (!Op.isReg() || !CommutedOp.isReg()) + return false; + +// The same situation with an immediate could reproduce if both inputs are +// the same register. +if (Op.isReg() && CommutedOp.isReg() && +(Op.getReg() == CommutedOp.getReg() && + Op.getSubReg() == CommutedOp.getSubReg())) return false; if (!TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo)) diff --git a/llvm/test/CodeGen/AMDGPU/dag-divergence.ll b/llvm/test/CodeGen/AMDGPU/dag-divergence.ll index dfc28539ea814..0f573fcc6deaa 100644 --- a/llvm/test/CodeGen/AMDGPU/dag-divergence.ll +++ b/llvm/test/CodeGen/AMDGPU/dag-divergence.ll @@ -37,8 +37,8 @@ define amdgpu_kernel void @flat_load_maybe_divergent(ptr addrspace(4) %k, ptr %f ; GCN-LABEL: {{^}}wide_carry_divergence_error: ; GCN: v_sub_u32_e32 ; GCN: v_subb_u32_e32 -; GCN: v_subbrev_u32_e32 -; GCN: v_subbrev_u32_e32 +; GCN: v_subb_u32_e32 +; GCN: v_subb_u32_e32 define <2 x i128> @wide_carry_divergence_error(i128 %arg) { %i = call i128 @llvm.ctlz.i128(i128 %arg, i1 false) %i1 = sub i128 0, %i diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll index 59bc7f332bf1e..3d9043d30c1ce 100644 --- a/llvm/test/CodeGen/AMDGPU/div_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll @@ -65,8 +65,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-NEXT:v_cndmask_b32_e64 v7, v7, 0, vcc ; GFX9-NEXT:v_sub_co_u32_e32 v2, vcc, v2, v3 ; GFX9-NEXT:v_subb_co_u32_e32 v3, vcc, v4, v7, vcc -; GFX9-NEXT:v_subbrev_co_u32_e32 v4, vcc, 0, v5, vcc -; GFX9-NEXT:v_subbrev_co_u32_e32 v5, vcc, 0, v5, vcc +; GFX9-NEXT:v_subb_co_u32_e32 v4, vcc, 0, v5, vcc +; GFX9-NEXT:v_subb_co_u32_e32 v5, vcc, 0, v5, vcc ; GFX9-NEXT:s_mov_b64 s[6:7], 0x7f ; GFX9-NEXT:v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3] ; GFX9-NEXT:v_mov_b32_e32 v18, v16 @@ -2355,8 +2355,8 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-NEXT:v_sub_co_u32_e32 v12, vcc, v8, v9 ; GFX9-NEXT:v_subb_co_u32_e32 v13, vcc, v10, v13, vcc ; GFX9-NEXT:v_mov_b32_e32 v8, 0 -; GFX9-NEXT:v_subbrev_co_u32_e32 v14, vcc, 0, v8, vcc -; GFX9-NEXT:v_subbrev_co_u32_e32 v15, vcc, 0, v8, vcc +; GFX9-NEXT:v_subb_co_u32_e32 v14, vcc, 0, v8, vcc +; GFX9-NEXT:v_subb_co_u32_e32 v15, vcc, 0, v8, vcc ; GFX9-NEXT:v_cmp_lt_u64_e32 vcc, s[6:7], v[12:13] ; GFX9-NEXT:v_or_b32_e32 v10, v13, v15 ; GFX9-NEXT:v_cndmask_b32_e64 v8, 0, 1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll index 41999b249a0e8..a58c1e7883b0b 100644 --- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll +++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll @@ -66,10 +66,10 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; SDAG-NEXT:v_sub_i32_e32 v2, vcc, v2, v10 ; SDAG-NEXT:v_subb_u32_e32 v3, vcc, v8, v9, vcc ; SDAG-NEXT:v_xor_b32_e32 v8, 0x7f, v2 -; SDAG-NEXT:v_subbrev_u32_e32 v10, vcc, 0, v18, vcc +; SDAG-NEXT:v_subb_u32_e32 v10, vcc, 0, v18, vcc ; SDAG-NEXT:v_cmp_lt_u64_e64 s[4:5], s[
[llvm-branch-commits] [llvm] AMDGPU: Fix overly conservative immediate operand check (PR #127563)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes The real legality check is peformed later anyway, so this was unnecessarily blocking immediate folds in handled cases. This also stops folding s_fmac_f32 to s_fmamk_f32 in a few tests, but that seems better. The globalisel changes look suspicious, it may be mishandling constants for VOP3P instructions. --- Full diff: https://github.com/llvm/llvm-project/pull/127563.diff 10 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+2-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll (+4-12) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll (+4-12) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll (+1-3) - (modified) llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll (+2-4) - (modified) llvm/test/CodeGen/AMDGPU/constrained-shift.ll (+2-4) - (modified) llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/global-saddr-load.ll (+1-4) - (modified) llvm/test/CodeGen/AMDGPU/packed-fp32.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll (+2-2) ``diff diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 84773349e0ca0..cbd858b9002ee 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -830,7 +830,8 @@ bool SIFoldOperandsImpl::tryToFoldACImm( if (UseOpIdx >= Desc.getNumOperands()) return false; - if (!AMDGPU::isSISrcInlinableOperand(Desc, UseOpIdx)) + // Filter out unhandled pseudos. + if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx)) return false; uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll index 4be00fedb972e..89078f20f1d47 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll @@ -920,9 +920,7 @@ define amdgpu_ps i64 @s_andn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1 ; GFX6-NEXT:s_lshl_b32 s3, s9, 16 ; GFX6-NEXT:s_and_b32 s4, s8, 0x ; GFX6-NEXT:s_or_b32 s3, s3, s4 -; GFX6-NEXT:s_mov_b32 s4, -1 -; GFX6-NEXT:s_mov_b32 s5, s4 -; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], -1 ; GFX6-NEXT:s_and_b64 s[0:1], s[0:1], s[2:3] ; GFX6-NEXT:; return to shader part epilog ; @@ -962,9 +960,7 @@ define amdgpu_ps i64 @s_andn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inr ; GFX6-NEXT:s_lshl_b32 s3, s9, 16 ; GFX6-NEXT:s_and_b32 s4, s8, 0x ; GFX6-NEXT:s_or_b32 s3, s3, s4 -; GFX6-NEXT:s_mov_b32 s4, -1 -; GFX6-NEXT:s_mov_b32 s5, s4 -; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], -1 ; GFX6-NEXT:s_and_b64 s[0:1], s[2:3], s[0:1] ; GFX6-NEXT:; return to shader part epilog ; @@ -1004,9 +1000,7 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_use(<4 x i16> inreg %src0, <4 ; GFX6-NEXT:s_lshl_b32 s3, s9, 16 ; GFX6-NEXT:s_and_b32 s4, s8, 0x ; GFX6-NEXT:s_or_b32 s3, s3, s4 -; GFX6-NEXT:s_mov_b32 s4, -1 -; GFX6-NEXT:s_mov_b32 s5, s4 -; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], -1 ; GFX6-NEXT:s_and_b64 s[0:1], s[0:1], s[2:3] ; GFX6-NEXT:; return to shader part epilog ; @@ -1060,9 +1054,7 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_foldable_use(<4 x i16> inreg ; GFX6-NEXT:s_lshl_b32 s5, s13, 16 ; GFX6-NEXT:s_and_b32 s6, s12, 0x ; GFX6-NEXT:s_or_b32 s5, s5, s6 -; GFX6-NEXT:s_mov_b32 s6, -1 -; GFX6-NEXT:s_mov_b32 s7, s6 -; GFX6-NEXT:s_xor_b64 s[4:5], s[4:5], s[6:7] +; GFX6-NEXT:s_xor_b64 s[4:5], s[4:5], -1 ; GFX6-NEXT:s_and_b64 s[0:1], s[0:1], s[4:5] ; GFX6-NEXT:s_and_b64 s[2:3], s[2:3], s[4:5] ; GFX6-NEXT:; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll index e7119c89ac06c..065fadf3b5ef3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll @@ -919,9 +919,7 @@ define amdgpu_ps i64 @s_orn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1) ; GFX6-NEXT:s_lshl_b32 s3, s9, 16 ; GFX6-NEXT:s_and_b32 s4, s8, 0x ; GFX6-NEXT:s_or_b32 s3, s3, s4 -; GFX6-NEXT:s_mov_b32 s4, -1 -; GFX6-NEXT:s_mov_b32 s5, s4 -; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], -1 ; GFX6-NEXT:s_or_b64 s[0:1], s[0:1], s[2:3] ; GFX6-NEXT:; return to shader part epilog ; @@ -961,9 +959,7 @@ define amdgpu_ps i64 @s_orn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inre ; GFX6-NEXT:s_lshl_b32 s3, s9, 16 ; GFX6-NEXT:s_and_b32 s4, s8, 0x ; GFX6-NEXT:s_or_b32 s3, s3, s4 -; GFX6-NEXT:s_mov_b32 s4, -1 -; GFX6-NEXT:
[llvm-branch-commits] [llvm] AMDGPU: Fix overly conservative immediate operand check (PR #127563)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/127563?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#127563** https://app.graphite.dev/github/pr/llvm/llvm-project/127563?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/127563?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#127562** https://app.graphite.dev/github/pr/llvm/llvm-project/127562?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127485** https://app.graphite.dev/github/pr/llvm/llvm-project/127485?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127484** https://app.graphite.dev/github/pr/llvm/llvm-project/127484?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127483** https://app.graphite.dev/github/pr/llvm/llvm-project/127483?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127482** https://app.graphite.dev/github/pr/llvm/llvm-project/127482?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127481** https://app.graphite.dev/github/pr/llvm/llvm-project/127481?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#127480** https://app.graphite.dev/github/pr/llvm/llvm-project/127480?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/127563 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix overly conservative immediate operand check (PR #127563)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/127563 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle subregister uses in SIFoldOperands constant folding (PR #127485)
https://github.com/arsenm commented: Later patch exposes miscompiles with this one https://github.com/llvm/llvm-project/pull/127485 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clangd] Add clangd 20 release notes (PR #127358)
https://github.com/HighCommander4 updated https://github.com/llvm/llvm-project/pull/127358 >From 8359f75adaacd9d2b7247e8887151cce59e7c086 Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Sat, 15 Feb 2025 01:30:48 -0500 Subject: [PATCH] [clangd] Add clangd 20 release notes --- clang-tools-extra/docs/ReleaseNotes.rst | 53 +++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index cc5f64a3f9fa3..316ac1743ccb7 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -56,7 +56,8 @@ Improvements to clangd Inlay hints ^^^ -- Added `DefaultArguments` Inlay Hints option. +- Added support for inlay hints for default arguments, enabled using the + `DefaultArguments` config option (#GH95712) Diagnostics ^^^ @@ -67,21 +68,42 @@ Semantic Highlighting Compile flags ^ +- Fixed a bug where clangd would unnecessarily reparse open files whose + compile command did not change when receiving a new compile command + via an LSP `workspace/configuration` request (#GH115438) + Hover ^ +- Hovering over a function name now shows the function's documentation + comment even if the comment is written above the function's out-of-line + definition in a different source file (#GH67802) + Code completion ^^^ +- Added an `ArgumentLists` config option under `Completion`. This is a more + flexible version of the `--function-arg-placeholders` command line flag, + allowing users more detailed control of what is inserted in argument list + position when clangd completes the name of a function in a function call + context. (#GH111322) +- Clangd now supports configuring which headers should be inserted using + `<>` vs. `""` syntax using the `QuotedHeaders` and `AngledHeaders` config + options under `Style` (#GH67749) - Added completion for C++20 keywords. +- Improved code completion behaviour in dependent/templated code +- Completion items now include documentation comments in more cases (#GH120099) Code actions - Added `Swap operands` tweak for certain binary operators. - - Improved the extract-to-function code action to allow extracting statements with overloaded operators like ``<<`` of ``std::ostream``. +- `Define outline` now handles member functions of class templates, and + member function templates. +- `Extract variable` can now operate on the top-level expression in an + expression statement (#GH112525) Signature help ^^ @@ -89,13 +111,38 @@ Signature help Cross-references +- Clangd now supports the "outgoing calls" direction of call hierarchy + (#GH77556) +- Call hierarchy can now be invoked on fields and namespace-scope + variables (#GH113900) +- Improved heuristics for filtering out generated Protobuf symbol names + during indexing (#GH110091) +- Compiler intrinsics defined in `*intrin.h` system headers are now + indexed even if they have reserved names (#GH119735) +- Various improvements to go-to-definition in templated code + Objective-C ^^^ +Clang-tidy integration +^^ + +- Improved robustness in handling clang-tidy check names (#GH109421) + +C++20 Modules Support +^ + +- Support code completion for symbols defined in modules (#GH110083) +- Improve performance when opening files that import modules (#GH106683) +- Compile commands for modules now respect modifications specified in `.clangd` + files (#GH122606) + Miscellaneous ^ -- The DefineOutline tweak now handles member functions of class templates. +- Fixed an OOM affecting some versions of libcxx headers compiled in C++20 + mode (#GH108866) +- Various other stability improvements, e.g. crash fixes Improvements to clang-doc - ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clangd] Add clangd 20 release notes (PR #127358)
HighCommander4 wrote: Thanks all for the reviews! Adding @tstellar to request merging this to the llvm 20 branch. https://github.com/llvm/llvm-project/pull/127358 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix foldImmediate breaking register class constraints (PR #127481)
arsenm wrote: ### Merge activity * **Feb 17, 10:28 PM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/127481). https://github.com/llvm/llvm-project/pull/127481 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/20.x: [ELF] Refine isExported/isPreemptible condition (PR #126848)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/126848 >From a2b502050302a4cf8a9c4e623331810eed51bb81 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 30 Jan 2025 19:03:38 -0800 Subject: [PATCH 1/3] [ELF] ICF: replace includeInDynsym with isExported Similar to the change to MarkLive.cpp when isExported was introduced. includeInDynsym might return true even when isExported is false for statically linked executables. (cherry picked from commit 45f538ecba1a51768002a5bc0c194b5af4cd9c27) --- lld/ELF/Driver.cpp | 2 +- lld/test/ELF/icf-safe.s | 9 ++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 2d8a5ade2fece..6121a4254453c 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -2434,7 +2434,7 @@ static void findKeepUniqueSections(Ctx &ctx, opt::InputArgList &args) { // or DSOs, so we conservatively mark them as address-significant. bool icfSafe = ctx.arg.icf == ICFLevel::Safe; for (Symbol *sym : ctx.symtab->getSymbols()) -if (sym->includeInDynsym(ctx)) +if (sym->isExported) markAddrsig(icfSafe, sym); // Visit the address-significance table in each object file and mark each diff --git a/lld/test/ELF/icf-safe.s b/lld/test/ELF/icf-safe.s index 96776feccbc67..5381532609938 100644 --- a/lld/test/ELF/icf-safe.s +++ b/lld/test/ELF/icf-safe.s @@ -1,16 +1,19 @@ # REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64 %S/Inputs/shared.s -o %ta.o +# RUN: ld.lld -shared -soname=ta %ta.o -o %ta.so # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o # RUN: llvm-objcopy %t1.o %t1copy.o # RUN: llvm-objcopy --localize-symbol=h1 %t1.o %t1changed.o # RUN: ld.lld -r %t1.o -o %t1reloc.o # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/Inputs/icf-safe.s -o %t2.o -# RUN: ld.lld %t1.o %t2.o -o %t2 --icf=safe --print-icf-sections | FileCheck %s +# RUN: ld.lld %t1.o %t2.o -o %t2 --icf=safe --print-icf-sections --export-dynamic | FileCheck %s # RUN: ld.lld %t1copy.o %t2.o -o %t2 --icf=safe --print-icf-sections | FileCheck %s # RUN: ld.lld %t1.o %t2.o -o %t3 --icf=safe --print-icf-sections -shared | FileCheck --check-prefix=EXPORT %s -# RUN: ld.lld %t1.o %t2.o -o %t3 --icf=safe --print-icf-sections --export-dynamic | FileCheck --check-prefix=EXPORT %s +## Exported symbols are suppressed for ICF when dynamic linking is enabled. +# RUN: ld.lld %t1.o %t2.o %ta.so -o %t3 --icf=safe --print-icf-sections --export-dynamic | FileCheck --check-prefix=EXPORT %s # RUN: ld.lld %t1.o %t2.o -o %t2 --icf=all --print-icf-sections | FileCheck --check-prefix=ALL %s -# RUN: ld.lld %t1.o %t2.o -o %t2 --icf=all --print-icf-sections --export-dynamic | FileCheck --check-prefix=ALL-EXPORT %s +# RUN: ld.lld %t1.o %t2.o %ta.so -o %t2 --icf=all --print-icf-sections --export-dynamic | FileCheck --check-prefix=ALL-EXPORT %s # RUN: ld.lld %t1changed.o -o %t4 --icf=safe 2>&1 | FileCheck --check-prefix=SH_LINK_0 %s # RUN: ld.lld %t1reloc.o -o %t4 --icf=safe 2>&1 | FileCheck --check-prefix=SH_LINK_0 %s >From 02a511e42c6783f14ac45d71c5278dd031c1bcf7 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 30 Jan 2025 22:24:04 -0800 Subject: [PATCH 2/3] [ELF] Merge exportDynamic/isExported and remove Symbol::includeInDynsym Commit 3733ed6f1c6b0eef1e13e175ac81ad309fc0b080 introduced isExported to cache includeInDynsym. If we don't unnecessarily set isExported for undefined symbols, exportDynamic/includeInDynsym can be replaced with isExported. (cherry picked from commit d6fa74ab3d4cc77005836e72a2d6fe222bab4c59) --- lld/ELF/Driver.cpp| 3 ++- lld/ELF/InputFiles.cpp| 4 ++-- lld/ELF/SymbolTable.cpp | 8 +++- lld/ELF/Symbols.cpp | 27 +-- lld/ELF/Symbols.h | 15 +-- lld/ELF/SyntheticSections.cpp | 4 ++-- lld/ELF/Writer.cpp| 13 +++-- 7 files changed, 34 insertions(+), 40 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 6121a4254453c..391140bce7394 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -2575,7 +2575,8 @@ void LinkerDriver::compileBitcodeFiles(bool skipLinkedOutput) { for (Symbol *sym : obj->getGlobalSymbols()) { if (!sym->isDefined()) continue; -if (ctx.hasDynsym && sym->includeInDynsym(ctx)) +if (ctx.hasDynsym && ctx.arg.exportDynamic && +sym->computeBinding(ctx) != STB_LOCAL) sym->isExported = true; if (sym->hasVersionSuffix) sym->parseSymbolVersion(ctx); diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index caee72cf31955..d43de8ce6dfef 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1581,7 +1581,7 @@ template void SharedFile::parse() { } Symbol *s = ctx.symtab->addSymbol( Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()}); - s->exportDyna
[llvm-branch-commits] [lld] 02a511e - [ELF] Merge exportDynamic/isExported and remove Symbol::includeInDynsym
Author: Fangrui Song Date: 2025-02-17T16:14:33-08:00 New Revision: 02a511e42c6783f14ac45d71c5278dd031c1bcf7 URL: https://github.com/llvm/llvm-project/commit/02a511e42c6783f14ac45d71c5278dd031c1bcf7 DIFF: https://github.com/llvm/llvm-project/commit/02a511e42c6783f14ac45d71c5278dd031c1bcf7.diff LOG: [ELF] Merge exportDynamic/isExported and remove Symbol::includeInDynsym Commit 3733ed6f1c6b0eef1e13e175ac81ad309fc0b080 introduced isExported to cache includeInDynsym. If we don't unnecessarily set isExported for undefined symbols, exportDynamic/includeInDynsym can be replaced with isExported. (cherry picked from commit d6fa74ab3d4cc77005836e72a2d6fe222bab4c59) Added: Modified: lld/ELF/Driver.cpp lld/ELF/InputFiles.cpp lld/ELF/SymbolTable.cpp lld/ELF/Symbols.cpp lld/ELF/Symbols.h lld/ELF/SyntheticSections.cpp lld/ELF/Writer.cpp Removed: diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 6121a4254453c..391140bce7394 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -2575,7 +2575,8 @@ void LinkerDriver::compileBitcodeFiles(bool skipLinkedOutput) { for (Symbol *sym : obj->getGlobalSymbols()) { if (!sym->isDefined()) continue; -if (ctx.hasDynsym && sym->includeInDynsym(ctx)) +if (ctx.hasDynsym && ctx.arg.exportDynamic && +sym->computeBinding(ctx) != STB_LOCAL) sym->isExported = true; if (sym->hasVersionSuffix) sym->parseSymbolVersion(ctx); diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index caee72cf31955..d43de8ce6dfef 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1581,7 +1581,7 @@ template void SharedFile::parse() { } Symbol *s = ctx.symtab->addSymbol( Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()}); - s->exportDynamic = true; + s->isExported = true; if (sym.getBinding() != STB_WEAK && ctx.arg.unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore) requiredSymbols.push_back(s); @@ -1778,7 +1778,7 @@ static void createBitcodeSymbol(Ctx &ctx, Symbol *&sym, nullptr); // The definition can be omitted if all bitcode definitions satisfy // `canBeOmittedFromSymbolTable()` and isUsedInRegularObj is false. -// The latter condition is tested in Symbol::includeInDynsym. +// The latter condition is tested in parseVersionAndComputeIsPreemptible. sym->ltoCanOmit = objSym.canBeOmittedFromSymbolTable() && (!sym->isDefined() || sym->ltoCanOmit); sym->resolve(ctx, newSym); diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index 975700505facb..b8a70d4e898fc 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -203,7 +203,7 @@ void SymbolTable::handleDynamicList() { syms = findByVersion(ver); for (Symbol *sym : syms) - sym->exportDynamic = sym->inDynamicList = true; + sym->isExported = sym->inDynamicList = true; } } @@ -350,10 +350,8 @@ void SymbolTable::scanVersionScript() { assignAsterisk(pat, &v, true); } - // isPreemptible is false at this point. To correctly compute the binding of a - // Defined (which is used by includeInDynsym(ctx)), we need to know if it is - // VER_NDX_LOCAL or not. Compute symbol versions before handling - // --dynamic-list. + // Handle --dynamic-list. If a specified symbol is also matched by local: in a + // version script, the version script takes precedence. handleDynamicList(); } diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index b10391c65dfdc..890877cb1bc04 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -268,16 +268,6 @@ uint8_t Symbol::computeBinding(Ctx &ctx) const { return binding; } -bool Symbol::includeInDynsym(Ctx &ctx) const { - if (computeBinding(ctx) == STB_LOCAL) -return false; - if (!isDefined() && !isCommon()) -return true; - - return exportDynamic || - (ctx.arg.exportDynamic && (isUsedInRegularObj || !ltoCanOmit)); -} - // Print out a log message for --trace-symbol. void elf::printTraceSymbol(const Symbol &sym, StringRef name) { std::string s; @@ -374,9 +364,18 @@ void elf::parseVersionAndComputeIsPreemptible(Ctx &ctx) { for (Symbol *sym : ctx.symtab->getSymbols()) { if (sym->hasVersionSuffix) sym->parseSymbolVersion(ctx); -if (hasDynsym) { - sym->isExported = sym->includeInDynsym(ctx); - sym->isPreemptible = sym->isExported && computeIsPreemptible(ctx, *sym); +if (!hasDynsym) + continue; +if (sym->computeBinding(ctx) == STB_LOCAL) { + sym->isExported = false; + continue; +} +if (!sym->isDefined() && !sym->isCommon()) { + sym->isPreemptible = computeIsPreemptible(ctx, *sym); +} else if (ctx.arg.exportDynamic && + (sym->isUsedInRegularObj |
[llvm-branch-commits] [lld] 9bcc825 - [ELF] Refine isExported/isPreemptible condition
Author: Fangrui Song Date: 2025-02-17T16:14:33-08:00 New Revision: 9bcc825ee491a85c2f7b1573d6a3abf6d5cf0c8a URL: https://github.com/llvm/llvm-project/commit/9bcc825ee491a85c2f7b1573d6a3abf6d5cf0c8a DIFF: https://github.com/llvm/llvm-project/commit/9bcc825ee491a85c2f7b1573d6a3abf6d5cf0c8a.diff LOG: [ELF] Refine isExported/isPreemptible condition Reland 994cea3f0a2d0caf4d66321ad5a06ab330144d89 after bolt tests no longer rely on -pie --unresolved-symbols=ignore-all with no input DSO generating PLT entries. --- Commit f10441ad003236ef3b9e5415a571d2be0c0ce5ce , while dropping a special case for isUndefWeak and --no-dynamic-linking, made --export-dynamic ineffective when -pie is used without any input DSO. This change restores --export-dynamic and unifies -pie and -pie --no-dynamic-linker when there is no input DSO. * -pie with no input DSO suppresses undefined symbols in .dynsym. Previously this only appied to -pie --no-dynamic-linker. * As a side effect, -pie with no input DSO suppresses PLT. (cherry picked from commit 52fc6ffcda0895c0c7b976ad1f5cb5a282b571d2) Added: Modified: lld/ELF/Config.h lld/ELF/Driver.cpp lld/ELF/Symbols.cpp lld/ELF/SyntheticSections.cpp lld/ELF/Writer.cpp lld/test/ELF/executable-undefined-ignoreall.s lld/test/ELF/ppc32-weak-undef-call.s lld/test/ELF/ppc64-undefined-weak.s lld/test/ELF/riscv-gp.s lld/test/ELF/weak-undef-lib.s lld/test/ELF/weak-undef-no-dynamic-linker.s lld/test/ELF/weak-undef-rw.s Removed: diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 98e52b52ea46a..9826ed0517337 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -292,7 +292,6 @@ struct Config { bool gdbIndex; bool gnuHash = false; bool gnuUnique; - bool hasDynSymTab; bool ignoreDataAddressEquality; bool ignoreFunctionAddressEquality; bool ltoCSProfileGenerate; @@ -306,7 +305,6 @@ struct Config { bool mipsN32Abi = false; bool mmapOutputFile; bool nmagic; - bool noDynamicLinker = false; bool noinhibitExec; bool nostdlib; bool oFormatBinary; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 391140bce7394..a1e9ecae08557 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -781,11 +781,8 @@ static StringRef getDynamicLinker(Ctx &ctx, opt::InputArgList &args) { auto *arg = args.getLastArg(OPT_dynamic_linker, OPT_no_dynamic_linker); if (!arg) return ""; - if (arg->getOption().getID() == OPT_no_dynamic_linker) { -// --no-dynamic-linker suppresses undefined weak symbols in .dynsym -ctx.arg.noDynamicLinker = true; + if (arg->getOption().getID() == OPT_no_dynamic_linker) return ""; - } return arg->getValue(); } @@ -2921,12 +2918,8 @@ template void LinkerDriver::link(opt::InputArgList &args) { parseFiles(ctx, files); - // Dynamic linking is used if there is an input DSO, - // or -shared or non-static pie is specified. - ctx.hasDynsym = !ctx.sharedFiles.empty() || ctx.arg.shared || - (ctx.arg.pie && !ctx.arg.noDynamicLinker); // Create dynamic sections for dynamic linking and static PIE. - ctx.arg.hasDynSymTab = ctx.hasDynsym || ctx.arg.isPic; + ctx.hasDynsym = !ctx.sharedFiles.empty() || ctx.arg.isPic; // If an entry symbol is in a static archive, pull out that file now. if (Symbol *sym = ctx.symtab->find(ctx.arg.entry)) diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index 890877cb1bc04..80b0691428007 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -360,7 +360,9 @@ void elf::parseVersionAndComputeIsPreemptible(Ctx &ctx) { // Symbol themselves might know their versions because symbols // can contain versions in the form of @. // Let them parse and update their names to exclude version suffix. + // In addition, compute isExported and isPreemptible. bool hasDynsym = ctx.hasDynsym; + bool maybePreemptible = ctx.sharedFiles.size() || ctx.arg.shared; for (Symbol *sym : ctx.symtab->getSymbols()) { if (sym->hasVersionSuffix) sym->parseSymbolVersion(ctx); @@ -371,7 +373,7 @@ void elf::parseVersionAndComputeIsPreemptible(Ctx &ctx) { continue; } if (!sym->isDefined() && !sym->isCommon()) { - sym->isPreemptible = computeIsPreemptible(ctx, *sym); + sym->isPreemptible = maybePreemptible && computeIsPreemptible(ctx, *sym); } else if (ctx.arg.exportDynamic && (sym->isUsedInRegularObj || !sym->ltoCanOmit)) { sym->isExported = true; diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index ffa6e3c008c48..b03c4282ab1aa 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -4740,7 +4740,7 @@ template void elf::createSyntheticSections(Ctx &ctx) { // Add MIPS-specific sections. if (ctx.arg.emachine == EM_MIPS) { -if (!ctx.arg.shared && ctx.arg.hasDynSymTab) { +if (!ctx.arg.sh
[llvm-branch-commits] [lld] release/20.x: [ELF] Refine isExported/isPreemptible condition (PR #126848)
github-actions[bot] wrote: @MaskRay (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/126848 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/20.x: [ELF] Refine isExported/isPreemptible condition (PR #126848)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/126848 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang] StmtPrinter: Handle DeclRefExpr to a Decomposition (#125001) (PR #126659)
tstellar wrote: @AaronBallman What do you think about backporting this? https://github.com/llvm/llvm-project/pull/126659 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [CSKY] Default to unsigned char (PR #126436)
tstellar wrote: ping @zixuan-wu https://github.com/llvm/llvm-project/pull/126436 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle brev and not cases in getConstValDefinedInReg (PR #127483)
arsenm wrote: ### Merge activity * **Feb 17, 10:28 PM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/127483). https://github.com/llvm/llvm-project/pull/127483 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Implement getConstValDefinedInReg and use in foldImmediate (NFC) (PR #127482)
arsenm wrote: ### Merge activity * **Feb 17, 10:28 PM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/127482). https://github.com/llvm/llvm-project/pull/127482 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix overly conservative immediate operand check (PR #127563)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/127563 >From 2f31f251066060c70674499b28be8ab4f438aee5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Feb 2025 22:31:48 +0700 Subject: [PATCH] AMDGPU: Fix overly conservative immediate operand check The real legality check is peformed later anyway, so this was unnecessarily blocking immediate folds in handled cases. This also stops folding s_fmac_f32 to s_fmamk_f32 in a few tests, but that seems better. The globalisel changes look suspicious, it may be mishandling constants for VOP3P instructions. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp| 3 ++- llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll | 16 llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll | 16 llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll | 4 +--- llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll | 6 ++ llvm/test/CodeGen/AMDGPU/constrained-shift.ll| 6 ++ .../CodeGen/AMDGPU/fold-operands-scalar-fmac.mir | 4 ++-- llvm/test/CodeGen/AMDGPU/global-saddr-load.ll| 5 + llvm/test/CodeGen/AMDGPU/packed-fp32.ll | 10 +- llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll| 4 ++-- 10 files changed, 25 insertions(+), 49 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 84773349e0ca0..cbd858b9002ee 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -830,7 +830,8 @@ bool SIFoldOperandsImpl::tryToFoldACImm( if (UseOpIdx >= Desc.getNumOperands()) return false; - if (!AMDGPU::isSISrcInlinableOperand(Desc, UseOpIdx)) + // Filter out unhandled pseudos. + if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx)) return false; uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll index 4be00fedb972e..89078f20f1d47 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll @@ -920,9 +920,7 @@ define amdgpu_ps i64 @s_andn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1 ; GFX6-NEXT:s_lshl_b32 s3, s9, 16 ; GFX6-NEXT:s_and_b32 s4, s8, 0x ; GFX6-NEXT:s_or_b32 s3, s3, s4 -; GFX6-NEXT:s_mov_b32 s4, -1 -; GFX6-NEXT:s_mov_b32 s5, s4 -; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], -1 ; GFX6-NEXT:s_and_b64 s[0:1], s[0:1], s[2:3] ; GFX6-NEXT:; return to shader part epilog ; @@ -962,9 +960,7 @@ define amdgpu_ps i64 @s_andn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inr ; GFX6-NEXT:s_lshl_b32 s3, s9, 16 ; GFX6-NEXT:s_and_b32 s4, s8, 0x ; GFX6-NEXT:s_or_b32 s3, s3, s4 -; GFX6-NEXT:s_mov_b32 s4, -1 -; GFX6-NEXT:s_mov_b32 s5, s4 -; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], -1 ; GFX6-NEXT:s_and_b64 s[0:1], s[2:3], s[0:1] ; GFX6-NEXT:; return to shader part epilog ; @@ -1004,9 +1000,7 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_use(<4 x i16> inreg %src0, <4 ; GFX6-NEXT:s_lshl_b32 s3, s9, 16 ; GFX6-NEXT:s_and_b32 s4, s8, 0x ; GFX6-NEXT:s_or_b32 s3, s3, s4 -; GFX6-NEXT:s_mov_b32 s4, -1 -; GFX6-NEXT:s_mov_b32 s5, s4 -; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], -1 ; GFX6-NEXT:s_and_b64 s[0:1], s[0:1], s[2:3] ; GFX6-NEXT:; return to shader part epilog ; @@ -1060,9 +1054,7 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_foldable_use(<4 x i16> inreg ; GFX6-NEXT:s_lshl_b32 s5, s13, 16 ; GFX6-NEXT:s_and_b32 s6, s12, 0x ; GFX6-NEXT:s_or_b32 s5, s5, s6 -; GFX6-NEXT:s_mov_b32 s6, -1 -; GFX6-NEXT:s_mov_b32 s7, s6 -; GFX6-NEXT:s_xor_b64 s[4:5], s[4:5], s[6:7] +; GFX6-NEXT:s_xor_b64 s[4:5], s[4:5], -1 ; GFX6-NEXT:s_and_b64 s[0:1], s[0:1], s[4:5] ; GFX6-NEXT:s_and_b64 s[2:3], s[2:3], s[4:5] ; GFX6-NEXT:; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll index e7119c89ac06c..065fadf3b5ef3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll @@ -919,9 +919,7 @@ define amdgpu_ps i64 @s_orn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1) ; GFX6-NEXT:s_lshl_b32 s3, s9, 16 ; GFX6-NEXT:s_and_b32 s4, s8, 0x ; GFX6-NEXT:s_or_b32 s3, s3, s4 -; GFX6-NEXT:s_mov_b32 s4, -1 -; GFX6-NEXT:s_mov_b32 s5, s4 -; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT:s_xor_b64 s[2:3], s[2:3], -1 ; GFX6-NEXT:s_or_b64 s[0:1], s[0:1], s[2:3] ; GFX6-NEXT:; return to shader part epilog ; @@ -961,9 +959,7 @@ define amdgpu_ps i64 @s_orn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inre ; GFX6-NEXT:s_lshl_b32 s3, s9, 16 ; GFX6-NEXT:
[llvm-branch-commits] [llvm] AMDGPU: Do not try to commute instruction with same input register (PR #127562)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/127562 >From 2f11ad086fafc06a1bc1e24ec89cb5a66c9c5eba Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 18 Feb 2025 10:05:30 +0700 Subject: [PATCH] AMDGPU: Do not try to commute instruction with same input register There's little point to trying to commute an instruction if the two operands are already the same. This avoids an assertion in a future patch, but this likely isn't the correct fix. The worklist management in SIFoldOperands is dodgy, and we should probably fix it to work like PeepholeOpt (i.e. stop looking at use lists, and fold from users). This is an extension of the already handled special case which it's trying to avoid folding an instruction which is already being folded. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 12 +++- llvm/test/CodeGen/AMDGPU/dag-divergence.ll| 4 +- llvm/test/CodeGen/AMDGPU/div_i128.ll | 8 +-- llvm/test/CodeGen/AMDGPU/div_v2i128.ll| 32 - llvm/test/CodeGen/AMDGPU/rem_i128.ll | 8 +-- ...-operands-commute-same-operands-assert.mir | 65 +++ 6 files changed, 102 insertions(+), 27 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/si-fold-operands-commute-same-operands-assert.mir diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 8492bb2c3518b..84773349e0ca0 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -691,11 +691,21 @@ bool SIFoldOperandsImpl::tryAddToFoldList( if (!CanCommute) return false; +MachineOperand &Op = MI->getOperand(OpNo); +MachineOperand &CommutedOp = MI->getOperand(CommuteOpNo); + // One of operands might be an Imm operand, and OpNo may refer to it after // the call of commuteInstruction() below. Such situations are avoided // here explicitly as OpNo must be a register operand to be a candidate // for memory folding. -if (!MI->getOperand(OpNo).isReg() || !MI->getOperand(CommuteOpNo).isReg()) +if (!Op.isReg() || !CommutedOp.isReg()) + return false; + +// The same situation with an immediate could reproduce if both inputs are +// the same register. +if (Op.isReg() && CommutedOp.isReg() && +(Op.getReg() == CommutedOp.getReg() && + Op.getSubReg() == CommutedOp.getSubReg())) return false; if (!TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo)) diff --git a/llvm/test/CodeGen/AMDGPU/dag-divergence.ll b/llvm/test/CodeGen/AMDGPU/dag-divergence.ll index dfc28539ea814..0f573fcc6deaa 100644 --- a/llvm/test/CodeGen/AMDGPU/dag-divergence.ll +++ b/llvm/test/CodeGen/AMDGPU/dag-divergence.ll @@ -37,8 +37,8 @@ define amdgpu_kernel void @flat_load_maybe_divergent(ptr addrspace(4) %k, ptr %f ; GCN-LABEL: {{^}}wide_carry_divergence_error: ; GCN: v_sub_u32_e32 ; GCN: v_subb_u32_e32 -; GCN: v_subbrev_u32_e32 -; GCN: v_subbrev_u32_e32 +; GCN: v_subb_u32_e32 +; GCN: v_subb_u32_e32 define <2 x i128> @wide_carry_divergence_error(i128 %arg) { %i = call i128 @llvm.ctlz.i128(i128 %arg, i1 false) %i1 = sub i128 0, %i diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll index 59bc7f332bf1e..3d9043d30c1ce 100644 --- a/llvm/test/CodeGen/AMDGPU/div_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll @@ -65,8 +65,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-NEXT:v_cndmask_b32_e64 v7, v7, 0, vcc ; GFX9-NEXT:v_sub_co_u32_e32 v2, vcc, v2, v3 ; GFX9-NEXT:v_subb_co_u32_e32 v3, vcc, v4, v7, vcc -; GFX9-NEXT:v_subbrev_co_u32_e32 v4, vcc, 0, v5, vcc -; GFX9-NEXT:v_subbrev_co_u32_e32 v5, vcc, 0, v5, vcc +; GFX9-NEXT:v_subb_co_u32_e32 v4, vcc, 0, v5, vcc +; GFX9-NEXT:v_subb_co_u32_e32 v5, vcc, 0, v5, vcc ; GFX9-NEXT:s_mov_b64 s[6:7], 0x7f ; GFX9-NEXT:v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3] ; GFX9-NEXT:v_mov_b32_e32 v18, v16 @@ -2355,8 +2355,8 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-NEXT:v_sub_co_u32_e32 v12, vcc, v8, v9 ; GFX9-NEXT:v_subb_co_u32_e32 v13, vcc, v10, v13, vcc ; GFX9-NEXT:v_mov_b32_e32 v8, 0 -; GFX9-NEXT:v_subbrev_co_u32_e32 v14, vcc, 0, v8, vcc -; GFX9-NEXT:v_subbrev_co_u32_e32 v15, vcc, 0, v8, vcc +; GFX9-NEXT:v_subb_co_u32_e32 v14, vcc, 0, v8, vcc +; GFX9-NEXT:v_subb_co_u32_e32 v15, vcc, 0, v8, vcc ; GFX9-NEXT:v_cmp_lt_u64_e32 vcc, s[6:7], v[12:13] ; GFX9-NEXT:v_or_b32_e32 v10, v13, v15 ; GFX9-NEXT:v_cndmask_b32_e64 v8, 0, 1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll index 41999b249a0e8..a58c1e7883b0b 100644 --- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll +++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll @@ -66,10 +66,10 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; SDAG-NEXT:v_sub_i32_e32 v2, vcc, v2, v10 ; SDAG-NEXT:v_subb_u32_e32 v3, vcc, v8,
[llvm-branch-commits] [llvm] AMDGPU: Handle subregister uses in SIFoldOperands constant folding (PR #127485)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/127485 >From e5b8e8eb6030e99fef6f3a9e410dbcaa163f9e16 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Feb 2025 17:18:27 +0700 Subject: [PATCH] AMDGPU: Handle subregister uses in SIFoldOperands constant folding --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 57 +++ .../AMDGPU/constant-fold-imm-immreg.mir | 34 +++ 2 files changed, 67 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 999553bfaff38..8492bb2c3518b 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -123,7 +123,7 @@ class SIFoldOperandsImpl { SmallVectorImpl &FoldList, SmallVectorImpl &CopiesToReplace) const; - MachineOperand *getImmOrMaterializedImm(MachineOperand &Op) const; + std::optional getImmOrMaterializedImm(MachineOperand &Op) const; bool tryConstantFoldOp(MachineInstr *MI) const; bool tryFoldCndMask(MachineInstr &MI) const; bool tryFoldZeroHighBits(MachineInstr &MI) const; @@ -1298,21 +1298,22 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) { MI.removeOperand(I); } -MachineOperand * +std::optional SIFoldOperandsImpl::getImmOrMaterializedImm(MachineOperand &Op) const { - // If this has a subregister, it obviously is a register source. - if (!Op.isReg() || Op.getSubReg() != AMDGPU::NoSubRegister || - !Op.getReg().isVirtual()) -return &Op; + if (Op.isImm()) +return Op.getImm(); - MachineInstr *Def = MRI->getVRegDef(Op.getReg()); + if (!Op.isReg() || !Op.getReg().isVirtual()) +return std::nullopt; + + const MachineInstr *Def = MRI->getVRegDef(Op.getReg()); if (Def && Def->isMoveImmediate()) { -MachineOperand &ImmSrc = Def->getOperand(1); +const MachineOperand &ImmSrc = Def->getOperand(1); if (ImmSrc.isImm()) - return &ImmSrc; + return TII->extractSubregFromImm(ImmSrc.getImm(), Op.getSubReg()); } - return &Op; + return std::nullopt; } // Try to simplify operations with a constant that may appear after instruction @@ -1327,12 +1328,14 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); if (Src0Idx == -1) return false; - MachineOperand *Src0 = getImmOrMaterializedImm(MI->getOperand(Src0Idx)); + + MachineOperand *Src0 = &MI->getOperand(Src0Idx); + std::optional Src0Imm = getImmOrMaterializedImm(*Src0); if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 || Opc == AMDGPU::S_NOT_B32) && - Src0->isImm()) { -MI->getOperand(1).ChangeToImmediate(~Src0->getImm()); + Src0Imm) { +MI->getOperand(1).ChangeToImmediate(~*Src0Imm); mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32))); return true; } @@ -1340,17 +1343,19 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); if (Src1Idx == -1) return false; - MachineOperand *Src1 = getImmOrMaterializedImm(MI->getOperand(Src1Idx)); - if (!Src0->isImm() && !Src1->isImm()) + MachineOperand *Src1 = &MI->getOperand(Src1Idx); + std::optional Src1Imm = getImmOrMaterializedImm(*Src1); + + if (!Src0Imm && !Src1Imm) return false; // and k0, k1 -> v_mov_b32 (k0 & k1) // or k0, k1 -> v_mov_b32 (k0 | k1) // xor k0, k1 -> v_mov_b32 (k0 ^ k1) - if (Src0->isImm() && Src1->isImm()) { + if (Src0Imm && Src1Imm) { int32_t NewImm; -if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm())) +if (!evalBinaryInstruction(Opc, NewImm, *Src0Imm, *Src1Imm)) return false; bool IsSGPR = TRI->isSGPRReg(*MRI, MI->getOperand(0).getReg()); @@ -1366,12 +1371,13 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { if (!MI->isCommutable()) return false; - if (Src0->isImm() && !Src1->isImm()) { + if (Src0Imm && !Src1Imm) { std::swap(Src0, Src1); std::swap(Src0Idx, Src1Idx); +std::swap(Src0Imm, Src1Imm); } - int32_t Src1Val = static_cast(Src1->getImm()); + int32_t Src1Val = static_cast(*Src1Imm); if (Opc == AMDGPU::V_OR_B32_e64 || Opc == AMDGPU::V_OR_B32_e32 || Opc == AMDGPU::S_OR_B32) { @@ -1428,9 +1434,12 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const { MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (!Src1->isIdenticalTo(*Src0)) { -auto *Src0Imm = getImmOrMaterializedImm(*Src0); -auto *Src1Imm = getImmOrMaterializedImm(*Src1); -if (!Src1Imm->isIdenticalTo(*Src0Imm)) +std::optional Src1Imm = getImmOrMaterializedImm(*Src1); +if (!Src1Imm) + return false; + +std::optional Src0I
[llvm-branch-commits] [lld] a2b5020 - [ELF] ICF: replace includeInDynsym with isExported
Author: Fangrui Song Date: 2025-02-17T16:14:33-08:00 New Revision: a2b502050302a4cf8a9c4e623331810eed51bb81 URL: https://github.com/llvm/llvm-project/commit/a2b502050302a4cf8a9c4e623331810eed51bb81 DIFF: https://github.com/llvm/llvm-project/commit/a2b502050302a4cf8a9c4e623331810eed51bb81.diff LOG: [ELF] ICF: replace includeInDynsym with isExported Similar to the change to MarkLive.cpp when isExported was introduced. includeInDynsym might return true even when isExported is false for statically linked executables. (cherry picked from commit 45f538ecba1a51768002a5bc0c194b5af4cd9c27) Added: Modified: lld/ELF/Driver.cpp lld/test/ELF/icf-safe.s Removed: diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 2d8a5ade2fece..6121a4254453c 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -2434,7 +2434,7 @@ static void findKeepUniqueSections(Ctx &ctx, opt::InputArgList &args) { // or DSOs, so we conservatively mark them as address-significant. bool icfSafe = ctx.arg.icf == ICFLevel::Safe; for (Symbol *sym : ctx.symtab->getSymbols()) -if (sym->includeInDynsym(ctx)) +if (sym->isExported) markAddrsig(icfSafe, sym); // Visit the address-significance table in each object file and mark each diff --git a/lld/test/ELF/icf-safe.s b/lld/test/ELF/icf-safe.s index 96776feccbc67..5381532609938 100644 --- a/lld/test/ELF/icf-safe.s +++ b/lld/test/ELF/icf-safe.s @@ -1,16 +1,19 @@ # REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64 %S/Inputs/shared.s -o %ta.o +# RUN: ld.lld -shared -soname=ta %ta.o -o %ta.so # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o # RUN: llvm-objcopy %t1.o %t1copy.o # RUN: llvm-objcopy --localize-symbol=h1 %t1.o %t1changed.o # RUN: ld.lld -r %t1.o -o %t1reloc.o # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/Inputs/icf-safe.s -o %t2.o -# RUN: ld.lld %t1.o %t2.o -o %t2 --icf=safe --print-icf-sections | FileCheck %s +# RUN: ld.lld %t1.o %t2.o -o %t2 --icf=safe --print-icf-sections --export-dynamic | FileCheck %s # RUN: ld.lld %t1copy.o %t2.o -o %t2 --icf=safe --print-icf-sections | FileCheck %s # RUN: ld.lld %t1.o %t2.o -o %t3 --icf=safe --print-icf-sections -shared | FileCheck --check-prefix=EXPORT %s -# RUN: ld.lld %t1.o %t2.o -o %t3 --icf=safe --print-icf-sections --export-dynamic | FileCheck --check-prefix=EXPORT %s +## Exported symbols are suppressed for ICF when dynamic linking is enabled. +# RUN: ld.lld %t1.o %t2.o %ta.so -o %t3 --icf=safe --print-icf-sections --export-dynamic | FileCheck --check-prefix=EXPORT %s # RUN: ld.lld %t1.o %t2.o -o %t2 --icf=all --print-icf-sections | FileCheck --check-prefix=ALL %s -# RUN: ld.lld %t1.o %t2.o -o %t2 --icf=all --print-icf-sections --export-dynamic | FileCheck --check-prefix=ALL-EXPORT %s +# RUN: ld.lld %t1.o %t2.o %ta.so -o %t2 --icf=all --print-icf-sections --export-dynamic | FileCheck --check-prefix=ALL-EXPORT %s # RUN: ld.lld %t1changed.o -o %t4 --icf=safe 2>&1 | FileCheck --check-prefix=SH_LINK_0 %s # RUN: ld.lld %t1reloc.o -o %t4 --icf=safe 2>&1 | FileCheck --check-prefix=SH_LINK_0 %s ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Do not keep samesign when speculatively executing icmps (#127007) (PR #127391)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/127391 >From 9e02cc4080f2268845c7e51a3f1a3b150daad40c Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 16 Feb 2025 20:18:29 +0800 Subject: [PATCH] [InstCombine] Do not keep samesign when speculatively executing icmps (#127007) Closes https://github.com/llvm/llvm-project/issues/126974. (cherry picked from commit 29f3a352068ce562bcb65e18a676c82a9991583c) --- .../InstCombine/InstCombineCompares.cpp | 5 llvm/test/Transforms/InstCombine/umax-icmp.ll | 24 +++ 2 files changed, 29 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index b64ac20ab0533..810ce7d382ae1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5609,6 +5609,11 @@ Instruction *InstCombinerImpl::foldICmpWithMinMax(Instruction &I, return false; return std::nullopt; }; + // Remove samesign here since it is illegal to keep it when we speculatively + // execute comparisons. For example, `icmp samesign ult umax(X, -46), -32` + // cannot be decomposed into `(icmp samesign ult X, -46) or (icmp samesign ult + // -46, -32)`. `X` is allowed to be non-negative here. + Pred = static_cast(Pred); auto CmpXZ = IsCondKnownTrue(simplifyICmpInst(Pred, X, Z, Q)); auto CmpYZ = IsCondKnownTrue(simplifyICmpInst(Pred, Y, Z, Q)); if (!CmpXZ.has_value() && !CmpYZ.has_value()) diff --git a/llvm/test/Transforms/InstCombine/umax-icmp.ll b/llvm/test/Transforms/InstCombine/umax-icmp.ll index b4eea30bfc6af..0c42d26750e4b 100644 --- a/llvm/test/Transforms/InstCombine/umax-icmp.ll +++ b/llvm/test/Transforms/InstCombine/umax-icmp.ll @@ -804,4 +804,28 @@ end: ret void } +define i1 @pr126974(i8 %x) { +; CHECK-LABEL: @pr126974( +; CHECK-NEXT: entry: +; CHECK-NEXT:[[COND:%.*]] = icmp sgt i8 [[X:%.*]], -2 +; CHECK-NEXT:br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT:ret i1 [[CMP]] +; CHECK: if.else: +; CHECK-NEXT:ret i1 false +; +entry: + %cond = icmp sgt i8 %x, -2 + br i1 %cond, label %if.then, label %if.else + +if.then: + %umax = call i8 @llvm.umax.i8(i8 %x, i8 -46) + %cmp = icmp samesign ult i8 %umax, -32 + ret i1 %cmp + +if.else: + ret i1 false +} + declare i32 @llvm.umax.i32(i32, i32) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 9e02cc4 - [InstCombine] Do not keep samesign when speculatively executing icmps (#127007)
Author: Yingwei Zheng Date: 2025-02-17T17:02:40-08:00 New Revision: 9e02cc4080f2268845c7e51a3f1a3b150daad40c URL: https://github.com/llvm/llvm-project/commit/9e02cc4080f2268845c7e51a3f1a3b150daad40c DIFF: https://github.com/llvm/llvm-project/commit/9e02cc4080f2268845c7e51a3f1a3b150daad40c.diff LOG: [InstCombine] Do not keep samesign when speculatively executing icmps (#127007) Closes https://github.com/llvm/llvm-project/issues/126974. (cherry picked from commit 29f3a352068ce562bcb65e18a676c82a9991583c) Added: Modified: llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp llvm/test/Transforms/InstCombine/umax-icmp.ll Removed: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index b64ac20ab0533..810ce7d382ae1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5609,6 +5609,11 @@ Instruction *InstCombinerImpl::foldICmpWithMinMax(Instruction &I, return false; return std::nullopt; }; + // Remove samesign here since it is illegal to keep it when we speculatively + // execute comparisons. For example, `icmp samesign ult umax(X, -46), -32` + // cannot be decomposed into `(icmp samesign ult X, -46) or (icmp samesign ult + // -46, -32)`. `X` is allowed to be non-negative here. + Pred = static_cast(Pred); auto CmpXZ = IsCondKnownTrue(simplifyICmpInst(Pred, X, Z, Q)); auto CmpYZ = IsCondKnownTrue(simplifyICmpInst(Pred, Y, Z, Q)); if (!CmpXZ.has_value() && !CmpYZ.has_value()) diff --git a/llvm/test/Transforms/InstCombine/umax-icmp.ll b/llvm/test/Transforms/InstCombine/umax-icmp.ll index b4eea30bfc6af..0c42d26750e4b 100644 --- a/llvm/test/Transforms/InstCombine/umax-icmp.ll +++ b/llvm/test/Transforms/InstCombine/umax-icmp.ll @@ -804,4 +804,28 @@ end: ret void } +define i1 @pr126974(i8 %x) { +; CHECK-LABEL: @pr126974( +; CHECK-NEXT: entry: +; CHECK-NEXT:[[COND:%.*]] = icmp sgt i8 [[X:%.*]], -2 +; CHECK-NEXT:br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT:ret i1 [[CMP]] +; CHECK: if.else: +; CHECK-NEXT:ret i1 false +; +entry: + %cond = icmp sgt i8 %x, -2 + br i1 %cond, label %if.then, label %if.else + +if.then: + %umax = call i8 @llvm.umax.i8(i8 %x, i8 -46) + %cmp = icmp samesign ult i8 %umax, -32 + ret i1 %cmp + +if.else: + ret i1 false +} + declare i32 @llvm.umax.i32(i32, i32) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Do not keep samesign when speculatively executing icmps (#127007) (PR #127391)
github-actions[bot] wrote: @dtcxzyw (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/127391 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [InstCombine] Do not keep samesign when speculatively executing icmps (#127007) (PR #127391)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/127391 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ReleaseNotes][RemoveDIs] Add release note for deprecated insertion methods (PR #127493)
github-actions[bot] wrote: @jmorse (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/127493 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits