[PATCH] D135374: [OpenMP][AMDGPU] Add 'uniform-work-group' attribute to OpenMP kernels

Joseph Huber via Phabricator via cfe-commits Thu, 06 Oct 2022 16:22:22 -0700

This revision was automatically updated to reflect the committed changes.
Closed by commit rG4aa87a131f93: [OpenMP][AMDGPU] Add 
&#039;uniform-work-group&#039; attribute to OpenMP kernels (authored by 
jhuber6).


Changed prior to commit:
  https://reviews.llvm.org/D135374?vs=465769&id=465923#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135374/new/

https://reviews.llvm.org/D135374

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/OpenMP/amdgcn-attributes.cpp


Index: clang/test/OpenMP/amdgcn-attributes.cpp
===================================================================
--- clang/test/OpenMP/amdgcn-attributes.cpp
+++ clang/test/OpenMP/amdgcn-attributes.cpp
@@ -32,10 +32,10 @@
   return x + 1;
 }
 
-  // DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone 
"frame-pointer"="none" "kernel" "min-legal-vector-width"="0" 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-  // CPU: attributes #0 = { convergent noinline norecurse nounwind optnone 
"frame-pointer"="none" "kernel" "min-legal-vector-width"="0" 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-cpu"="gfx900" 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
 }
-  // NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone 
"amdgpu-ieee"="false" "frame-pointer"="none" "kernel" 
"min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" }
-  // UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind 
optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "kernel" 
"min-legal-vector-width"="0" "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" }
+// DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone 
"frame-pointer"="none" "kernel" "min-legal-vector-width"="0" 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"uniform-work-group-size"="true" }
+// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone 
"frame-pointer"="none" "kernel" "min-legal-vector-width"="0" 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-cpu"="gfx900" 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
 "uniform-work-group-size"="true" }
+// NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone 
"amdgpu-ieee"="false" "frame-pointer"="none" "kernel" 
"min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
+// UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind 
optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "kernel" 
"min-legal-vector-width"="0" "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
 
 // DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind 
optnone "frame-pointer"="none" "min-legal-vector-width"="0" 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 // CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone 
"frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" "target-cpu"="gfx900" 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
 }
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -9423,8 +9423,12 @@
 
   const bool IsHIPKernel =
       M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();
+  const bool IsOpenMPkernel =
+      M.getLangOpts().OpenMPIsDevice &&
+      (F->getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL);
 
-  if (IsHIPKernel)
+  // TODO: This should be moved to language specific attributes instead.
+  if (IsHIPKernel || IsOpenMPkernel)
     F->addFnAttr("uniform-work-group-size", "true");
 
   if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())

Index: clang/test/OpenMP/amdgcn-attributes.cpp
===================================================================
--- clang/test/OpenMP/amdgcn-attributes.cpp
+++ clang/test/OpenMP/amdgcn-attributes.cpp
@@ -32,10 +32,10 @@
   return x + 1;
 }
 
-  // DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-  // CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" }
-  // NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-  // UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
+// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
+// NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
+// UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
 
 // DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 // CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" }
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -9423,8 +9423,12 @@
 
   const bool IsHIPKernel =
       M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();
+  const bool IsOpenMPkernel =
+      M.getLangOpts().OpenMPIsDevice &&
+      (F->getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL);
 
-  if (IsHIPKernel)
+  // TODO: This should be moved to language specific attributes instead.
+  if (IsHIPKernel || IsOpenMPkernel)
     F->addFnAttr("uniform-work-group-size", "true");
 
   if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D135374: [OpenMP][AMDGPU] Add 'uniform-work-group' attribute to OpenMP kernels

Reply via email to