AlexVlx created this revision.
AlexVlx added reviewers: yaxunl, rjmccall, eli.friedman, arsenm, tra, jlebar.
AlexVlx added a project: clang.
Herald added a subscriber: ormris.
Herald added a project: All.
AlexVlx requested review of this revision.
Herald added subscribers: cfe-commits, wdng.

This patch adds the CodeGen changes needed by the standard algorithm offload 
feature being proposed here: 
https://discourse.llvm.org/t/rfc-adding-c-parallel-algorithm-offload-support-to-clang-llvm/72159/1.
 The verbose documentation is included in the head of the patch series. This 
change concludes the set of additions needed in Clang, and essentially relaxes 
restrictions on what gets emitted on the device path, when compiling in 
`stdpar` mode (after the previous patch relaxed restrictions on what is 
semantically correct):

1. Unless a function is explicitly marked `__host__`, it will get emitted, 
whereas before only `__device__` and `__global__` functions would be emitted;
  - At the moment we special case `thread_local` handling and still do not emit 
them, as they will require more scaffolding that will be proposed at some point 
in the future.
2. Unsupported builtins are ignored as opposed to being marked as an error, as 
the decision on their validity is deferred to the `stdpar` specific code 
selection pass we are adding, which will be the topic of the final patch in 
this series;
3. We add the `stdpar` specific passes to the `opt` pipeline, independent of 
optimisation level:
  - When compiling for the accelerator / offload device, we add a code 
selection pass;
  - When compiling for the host, iff the user requested it via the 
`--stdpar-interpose-alloc` flag, we add a pass which replaces canonical 
allocation / deallocation functions with accelerator aware equivalents.

A test to validate that unannotated functions get correctly emitted is added as 
well. Please note that `__device__`, `__global__` and `__host__` are used to 
match existing nomenclature, they would not be present in user code.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D155850

Files:
  clang/lib/CodeGen/BackendUtil.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenStdPar/unannotated-functions-get-emitted.cpp


Index: clang/test/CodeGenStdPar/unannotated-functions-get-emitted.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenStdPar/unannotated-functions-get-emitted.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -x hip -emit-llvm -fcuda-is-device \
+// RUN:   -o - %s | FileCheck --check-prefix=NO-STDPAR-DEV %s
+
+// RUN: %clang_cc1 --stdpar -emit-llvm -fcuda-is-device \
+// RUN:   -o - %s | FileCheck --check-prefix=STDPAR-DEV %s
+
+#define __device__ __attribute__((device))
+
+// NO-STDPAR-DEV-NOT: define {{.*}} void @_Z3fooPff({{.*}})
+// STDPAR-DEV: define {{.*}} void @_Z3fooPff({{.*}})
+void foo(float *a, float b) {
+  *a = b;
+}
+
+// NO-STDPAR-DEV: define {{.*}} void @_Z3barPff({{.*}})
+// STDPAR-DEV: define {{.*}} void @_Z3barPff({{.*}})
+__device__ void bar(float *a, float b) {
+  *a = b;
+}
\ No newline at end of file
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -3545,7 +3545,12 @@
           !Global->hasAttr<CUDAConstantAttr>() &&
           !Global->hasAttr<CUDASharedAttr>() &&
           !Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
-          !Global->getType()->isCUDADeviceBuiltinTextureType())
+          !Global->getType()->isCUDADeviceBuiltinTextureType() &&
+          !(LangOpts.HIPStdPar &&
+            isa<FunctionDecl>(Global) &&
+            !cast<FunctionDecl>(Global)->getBuiltinID() &&
+            !Global->hasAttr<CUDAHostAttr>() &&
+            !cast<FunctionDecl>(Global)->isVariadic()))
         return;
     } else {
       // We need to emit host-side 'shadows' for all global
@@ -5310,7 +5315,9 @@
 
   setNonAliasAttributes(D, GV);
 
-  if (D->getTLSKind() && !GV->isThreadLocal()) {
+  if (D->getTLSKind() &&
+      !GV->isThreadLocal() &&
+      !(getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)) {
     if (D->getTLSKind() == VarDecl::TLS_Dynamic)
       CXXThreadLocals.push_back(D);
     setTLSMode(GV, *D);
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -5538,7 +5538,8 @@
     llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
   }
 
-  ErrorUnsupported(E, "builtin function");
+  if (!getLangOpts().HIPStdPar)
+    ErrorUnsupported(E, "builtin function");
 
   // Unknown builtin, for now just dump it out and return undef.
   return GetUndefRValue(E->getType());
Index: clang/lib/CodeGen/BackendUtil.cpp
===================================================================
--- clang/lib/CodeGen/BackendUtil.cpp
+++ clang/lib/CodeGen/BackendUtil.cpp
@@ -77,6 +77,7 @@
 #include "llvm/Transforms/Scalar/EarlyCSE.h"
 #include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Scalar/JumpThreading.h"
+#include "llvm/Transforms/StdPar/StdPar.h"
 #include "llvm/Transforms/Utils/Debugify.h"
 #include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -1093,6 +1094,13 @@
       TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1));
   }
 
+  if (LangOpts.HIPStdPar) {
+    if (LangOpts.CUDAIsDevice)
+      MPM.addPass(StdParAcceleratorCodeSelectionPass());
+    else if (LangOpts.HIPStdParInterposeAlloc)
+      MPM.addPass(StdParAllocationInterpositionPass());
+  }
+
   // Now that we have all of the passes ready, run them.
   {
     PrettyStackTraceString CrashInfo("Optimizer");


Index: clang/test/CodeGenStdPar/unannotated-functions-get-emitted.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenStdPar/unannotated-functions-get-emitted.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -x hip -emit-llvm -fcuda-is-device \
+// RUN:   -o - %s | FileCheck --check-prefix=NO-STDPAR-DEV %s
+
+// RUN: %clang_cc1 --stdpar -emit-llvm -fcuda-is-device \
+// RUN:   -o - %s | FileCheck --check-prefix=STDPAR-DEV %s
+
+#define __device__ __attribute__((device))
+
+// NO-STDPAR-DEV-NOT: define {{.*}} void @_Z3fooPff({{.*}})
+// STDPAR-DEV: define {{.*}} void @_Z3fooPff({{.*}})
+void foo(float *a, float b) {
+  *a = b;
+}
+
+// NO-STDPAR-DEV: define {{.*}} void @_Z3barPff({{.*}})
+// STDPAR-DEV: define {{.*}} void @_Z3barPff({{.*}})
+__device__ void bar(float *a, float b) {
+  *a = b;
+}
\ No newline at end of file
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -3545,7 +3545,12 @@
           !Global->hasAttr<CUDAConstantAttr>() &&
           !Global->hasAttr<CUDASharedAttr>() &&
           !Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
-          !Global->getType()->isCUDADeviceBuiltinTextureType())
+          !Global->getType()->isCUDADeviceBuiltinTextureType() &&
+          !(LangOpts.HIPStdPar &&
+            isa<FunctionDecl>(Global) &&
+            !cast<FunctionDecl>(Global)->getBuiltinID() &&
+            !Global->hasAttr<CUDAHostAttr>() &&
+            !cast<FunctionDecl>(Global)->isVariadic()))
         return;
     } else {
       // We need to emit host-side 'shadows' for all global
@@ -5310,7 +5315,9 @@
 
   setNonAliasAttributes(D, GV);
 
-  if (D->getTLSKind() && !GV->isThreadLocal()) {
+  if (D->getTLSKind() &&
+      !GV->isThreadLocal() &&
+      !(getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)) {
     if (D->getTLSKind() == VarDecl::TLS_Dynamic)
       CXXThreadLocals.push_back(D);
     setTLSMode(GV, *D);
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -5538,7 +5538,8 @@
     llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
   }
 
-  ErrorUnsupported(E, "builtin function");
+  if (!getLangOpts().HIPStdPar)
+    ErrorUnsupported(E, "builtin function");
 
   // Unknown builtin, for now just dump it out and return undef.
   return GetUndefRValue(E->getType());
Index: clang/lib/CodeGen/BackendUtil.cpp
===================================================================
--- clang/lib/CodeGen/BackendUtil.cpp
+++ clang/lib/CodeGen/BackendUtil.cpp
@@ -77,6 +77,7 @@
 #include "llvm/Transforms/Scalar/EarlyCSE.h"
 #include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Scalar/JumpThreading.h"
+#include "llvm/Transforms/StdPar/StdPar.h"
 #include "llvm/Transforms/Utils/Debugify.h"
 #include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -1093,6 +1094,13 @@
       TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1));
   }
 
+  if (LangOpts.HIPStdPar) {
+    if (LangOpts.CUDAIsDevice)
+      MPM.addPass(StdParAcceleratorCodeSelectionPass());
+    else if (LangOpts.HIPStdParInterposeAlloc)
+      MPM.addPass(StdParAllocationInterpositionPass());
+  }
+
   // Now that we have all of the passes ready, run them.
   {
     PrettyStackTraceString CrashInfo("Optimizer");
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to