[PATCH] D108150: [Remarks] [AMDGPU] Emit optimization remarks for atomics generating hardware instructions

Anshil Gandhi via Phabricator via cfe-commits Wed, 18 Aug 2021 03:52:15 -0700

gandhi21299 updated this revision to Diff 367042.
gandhi21299 added a comment.


- corrected logic for ORE in SIISelLowering.cpp


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108150/new/

https://reviews.llvm.org/D108150

Files:
  clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl
  clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl
  clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl
  llvm/lib/CodeGen/AtomicExpandPass.cpp
  llvm/lib/Target/AMDGPU/SIISelLowering.cpp
  llvm/test/CodeGen/AMDGPU/atomics-cas-remarks-gfx90a.ll
  llvm/test/CodeGen/AMDGPU/atomics-hw-remarks-gfx90a.ll
  llvm/test/CodeGen/AMDGPU/atomics-remarks-gfx90a.ll

Index: llvm/test/CodeGen/AMDGPU/atomics-hw-remarks-gfx90a.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/atomics-hw-remarks-gfx90a.ll
@@ -0,0 +1,95 @@
+; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs --pass-remarks=si-lower \
+; RUN:      %s -o - 2>&1 | FileCheck %s --check-prefix=GFX90A-HW
+
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope system due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope agent due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope workgroup due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope wavefront due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope singlethread due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope agent-one-as due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope workgroup-one-as due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope wavefront-one-as due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope singlethread-one-as due to an unsafe request.
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw:
+; GFX90A-HW:    ds_add_f64 v2, v[0:1]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw(double addrspace(3)* %ptr) #0 {
+main_body:
+  %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_agent:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_agent(float addrspace(1)* %ptr, float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("agent") monotonic, align 4
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wg:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_wg(float addrspace(1)* %ptr, float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("workgroup") monotonic, align 4
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wavefront:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_wavefront(float addrspace(1)* %ptr, float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("wavefront") monotonic, align 4
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_single_thread:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_single_thread(float addrspace(1)* %ptr, float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("singlethread") monotonic, align 4
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_aoa:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_aoa(float addrspace(1)* %ptr, float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("agent-one-as") monotonic, align 4
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wgoa:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_wgoa(float addrspace(1)* %ptr, float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("workgroup-one-as") monotonic, align 4
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wfoa:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_wfoa(float addrspace(1)* %ptr, float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("wavefront-one-as") monotonic, align 4
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_stoa:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_stoa(float addrspace(1)* %ptr, float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("singlethread-one-as") monotonic, align 4
+  ret void
+}
+
+attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -19,6 +19,7 @@
 #include "SIRegisterInfo.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -12120,6 +12121,25 @@
 
 TargetLowering::AtomicExpansionKind
 SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+
+  auto ReportUnsafeHWInst = [&](TargetLowering::AtomicExpansionKind Kind) {
+    OptimizationRemarkEmitter ORE(RMW->getFunction());
+    LLVMContext &Ctx = RMW->getFunction()->getContext();
+    SmallVector<StringRef> SSNs;
+    Ctx.getSyncScopeNames(SSNs);
+    auto MemScope = SSNs[RMW->getSyncScopeID()].empty()
+                        ? "system"
+                        : SSNs[RMW->getSyncScopeID()];
+    ORE.emit([&]() {
+      return OptimizationRemark(DEBUG_TYPE, "Passed", RMW)
+             << "Hardware instruction generated for atomic "
+             << RMW->getOperationName(RMW->getOperation())
+             << " operation at memory scope " << MemScope
+             << " due to an unsafe request.";
+    });
+    return Kind;
+  };
+
   switch (RMW->getOperation()) {
   case AtomicRMWInst::FAdd: {
     Type *Ty = RMW->getType();
@@ -12154,13 +12174,13 @@
             SSID == RMW->getContext().getOrInsertSyncScopeID("one-as"))
           return AtomicExpansionKind::CmpXChg;
 
-        return AtomicExpansionKind::None;
+        return ReportUnsafeHWInst(AtomicExpansionKind::None);
       }
 
       if (AS == AMDGPUAS::FLAT_ADDRESS)
         return AtomicExpansionKind::CmpXChg;
 
-      return RMW->use_empty() ? AtomicExpansionKind::None
+      return RMW->use_empty() ? ReportUnsafeHWInst(AtomicExpansionKind::None)
                               : AtomicExpansionKind::CmpXChg;
     }
 
@@ -12171,12 +12191,15 @@
       if (!Ty->isDoubleTy())
         return AtomicExpansionKind::None;
 
-      return (fpModeMatchesGlobalFPAtomicMode(RMW) ||
-              RMW->getFunction()
-                      ->getFnAttribute("amdgpu-unsafe-fp-atomics")
-                      .getValueAsString() == "true")
-                 ? AtomicExpansionKind::None
-                 : AtomicExpansionKind::CmpXChg;
+      if (fpModeMatchesGlobalFPAtomicMode(RMW) ||
+          RMW->getFunction()
+              ->getFnAttribute("amdgpu-unsafe-fp-atomics")
+              .getValueAsBool()) {
+        if (!fpModeMatchesGlobalFPAtomicMode(RMW))
+          return ReportUnsafeHWInst(AtomicExpansionKind::None);
+        return AtomicExpansionKind::None;
+      }
+      return AtomicExpansionKind::CmpXChg;
     }
 
     return AtomicExpansionKind::CmpXChg;
Index: llvm/lib/CodeGen/AtomicExpandPass.cpp
===================================================================
--- llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -610,7 +610,7 @@
                           : SSNs[AI->getSyncScopeID()];
       OptimizationRemarkEmitter ORE(AI->getFunction());
       ORE.emit([&]() {
-        return OptimizationRemark(DEBUG_TYPE, "Passed", AI->getFunction())
+        return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
                << "A compare and swap loop was generated for an atomic "
                << AI->getOperationName(AI->getOperation()) << " operation at "
                << MemScope << " memory scope";
Index: clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl
===================================================================
--- /dev/null
+++ clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu gfx90a \
+// RUN:     -Rpass=si-lower -munsafe-fp-atomics %s -S -emit-llvm -o - 2>&1 | \
+// RUN:     FileCheck %s --check-prefix=GFX90A-HW
+
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu gfx90a \
+// RUN:     -Rpass=si-lower -munsafe-fp-atomics %s -S -o - 2>&1 | \
+// RUN:     FileCheck %s --check-prefix=GFX90A-HW-REMARK
+
+
+// REQUIRES: amdgpu-registered-target
+
+typedef enum memory_order {
+  memory_order_relaxed = __ATOMIC_RELAXED,
+  memory_order_acquire = __ATOMIC_ACQUIRE,
+  memory_order_release = __ATOMIC_RELEASE,
+  memory_order_acq_rel = __ATOMIC_ACQ_REL,
+  memory_order_seq_cst = __ATOMIC_SEQ_CST
+} memory_order;
+
+typedef enum memory_scope {
+  memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
+  memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
+  memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
+  memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
+  memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
+#endif
+} memory_scope;
+
+// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope workgroup-one-as due to an unsafe request. [-Rpass=si-lower]
+// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope agent-one-as due to an unsafe request. [-Rpass=si-lower]
+// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope wavefront-one-as due to an unsafe request. [-Rpass=si-lower]
+// GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc
+// GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc
+// GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc
+// GFX90A-HW-LABEL: @atomic_unsafe_hw
+// GFX90A-HW:   atomicrmw fadd float addrspace(1)* %{{.*}}, float %{{.*}} syncscope("workgroup-one-as") monotonic, align 4
+// GFX90A-HW:   atomicrmw fadd float addrspace(1)* %{{.*}}, float %{{.*}} syncscope("agent-one-as") monotonic, align 4
+// GFX90A-HW:   atomicrmw fadd float addrspace(1)* %{{.*}}, float %{{.*}} syncscope("wavefront-one-as") monotonic, align 4
+void atomic_unsafe_hw(__global atomic_float *d, float a) {
+  float ret1 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
+  float ret2 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_device);
+  float ret3 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_sub_group);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D108150: [Remarks] [AMDGPU] Emit optimization remarks for atomics generating hardware instructions

Reply via email to