https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/101698
Delete the attribute and annotate any atomicrmw instructions in the function with new metadata. >From f19c1c2205115215567b7860a1a9fda2489eb114 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Thu, 18 Apr 2024 15:14:24 +0200 Subject: [PATCH] IR/AMDGPU: Autoupgrade amdgpu-unsafe-fp-atomics attribute Delete the attribute and annotate any atomicrmw instructions in the function with new metadata. --- llvm/lib/IR/AutoUpgrade.cpp | 34 ++++++++ .../amdgpu-unsafe-fp-atomics-upgrade.ll | 80 +++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 llvm/test/Bitcode/amdgpu-unsafe-fp-atomics-upgrade.ll diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 0e95c6df670dc..eda5c0f66349f 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -5260,6 +5260,22 @@ struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> { Call.addFnAttr(Attribute::NoBuiltin); } }; + +/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata +struct AMDGPUUnsafeFPAtomicsUpgradeVisitor + : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> { + AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default; + + void visitAtomicRMWInst(AtomicRMWInst &RMW) { + if (!RMW.isFloatingPointOperation()) + return; + + MDNode *Empty = MDNode::get(RMW.getContext(), {}); + RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty); + RMW.setMetadata("amdgpu.no.remote.memory.access", Empty); + RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty); + } +}; } // namespace void llvm::UpgradeFunctionAttributes(Function &F) { @@ -5282,6 +5298,24 @@ void llvm::UpgradeFunctionAttributes(Function &F) { F.setSection(A.getValueAsString()); F.removeFnAttr("implicit-section-name"); } + + if (!F.empty()) { + // For some reason this is called twice, and the first time is before any + // instructions are loaded into the body. + + if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics"); + A.isValid()) { + + if (A.getValueAsBool()) { + AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor; + Visitor.visit(F); + } + + // We will leave behind dead attribute uses on external declarations, but + // clang never added these to declarations anyway. + F.removeFnAttr("amdgpu-unsafe-fp-atomics"); + } + } } static bool isOldLoopArgument(Metadata *MD) { diff --git a/llvm/test/Bitcode/amdgpu-unsafe-fp-atomics-upgrade.ll b/llvm/test/Bitcode/amdgpu-unsafe-fp-atomics-upgrade.ll new file mode 100644 index 0000000000000..ceac496d1e8dd --- /dev/null +++ b/llvm/test/Bitcode/amdgpu-unsafe-fp-atomics-upgrade.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +; amdgpu-unsafe-fp-atomics attribute should be removed and replaced +; with metadata attached to any atomicrmw with floating-point +; operations. + +; Maybe the attribute should be dropped from declarations, but it +; didn't do anything on one and clang never added it. +declare void @unsafe_fp_atomics_true_decl() "amdgpu-unsafe-fp-atomics"="true" +declare void @unsafe_fp_atomics_false_decl() "amdgpu-unsafe-fp-atomics"="false" + +; Delete the attribute and replace with the most aggressive metadata possible +define void @unsafe_fp_atomics_true(ptr addrspace(1) %ptr, float %val, i32 %ival, <2 x half> %vval) "amdgpu-unsafe-fp-atomics"="true" { +; CHECK-LABEL: define void @unsafe_fp_atomics_true( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]], i32 [[IVAL:%.*]], <2 x half> [[VVAL:%.*]]) { +; CHECK-NEXT: [[RMW_FADD:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_FSUB:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_FMIN:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_FMAX:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_XOR:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[IVAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FADD_VECTOR:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_FSUB_VECTOR:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_FMIN_VECTOR:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_FMAX_VECTOR:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4, !amdgpu.no.fine.grained.host.memory [[META0]], !amdgpu.no.remote.memory.access [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; CHECK-NEXT: [[RMW_XCHG:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: ret void +; + %rmw.fadd = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.fsub = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.fmin = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.fmax = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.xor = atomicrmw xor ptr addrspace(1) %ptr, i32 %ival syncscope("one-as") seq_cst + %rmw.fadd.vector = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + %rmw.fsub.vector = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + %rmw.fmin.vector = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + %rmw.fmax.vector = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + + ; xchg doesn't need any metadata + %rmw.xchg = atomicrmw xchg ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + ret void +} + +; Should just delete the effectless attribute if it exists +define void @unsafe_fp_atomics_false(ptr addrspace(1) %ptr, float %val, i32 %ival, <2 x half> %vval) "amdgpu-unsafe-fp-atomics"="false" { +; CHECK-LABEL: define void @unsafe_fp_atomics_false( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]], i32 [[IVAL:%.*]], <2 x half> [[VVAL:%.*]]) { +; CHECK-NEXT: [[RMW_FADD:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FSUB:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FMIN:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FMAX:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_XOR:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[IVAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FADD_VECTOR:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FSUB_VECTOR:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FMIN_VECTOR:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_FMAX_VECTOR:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], <2 x half> [[VVAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: [[RMW_XCHG:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VAL]] syncscope("one-as") seq_cst, align 4 +; CHECK-NEXT: ret void +; + %rmw.fadd = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.fsub = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.fmin = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.fmax = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + %rmw.xor = atomicrmw xor ptr addrspace(1) %ptr, i32 %ival syncscope("one-as") seq_cst + %rmw.fadd.vector = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + %rmw.fsub.vector = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + %rmw.fmin.vector = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + %rmw.fmax.vector = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %vval syncscope("one-as") seq_cst + + ; xchg doesn't need any metadata + %rmw.xchg = atomicrmw xchg ptr addrspace(1) %ptr, float %val syncscope("one-as") seq_cst + ret void +} + +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { "amdgpu-unsafe-fp-atomics"="true" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { "amdgpu-unsafe-fp-atomics"="false" } +;. +; CHECK: [[META0]] = !{} +;. _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits