saiislam updated this revision to Diff 264192.
saiislam added a comment.
Moved isGPU() from llvm's Triple.h to clang's TargetInfo. Renamed it to
isOpenMPGPU()
to represent target's compatibility with OpenMP offloading and reduce its scope.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D79754/new/
https://reviews.llvm.org/D79754
Files:
clang/include/clang/Basic/TargetInfo.h
clang/lib/AST/Decl.cpp
clang/lib/Frontend/CompilerInvocation.cpp
clang/test/Driver/openmp-offload-gpu.c
clang/test/OpenMP/target_parallel_no_exceptions.cpp
llvm/include/llvm/ADT/Triple.h
Index: llvm/include/llvm/ADT/Triple.h
===================================================================
--- llvm/include/llvm/ADT/Triple.h
+++ llvm/include/llvm/ADT/Triple.h
@@ -692,6 +692,9 @@
return getArch() == Triple::nvptx || getArch() == Triple::nvptx64;
}
+ /// Tests whether the target is AMDGCN
+ bool isAMDGCN() const { return getArch() == Triple::amdgcn; }
+
bool isAMDGPU() const {
return getArch() == Triple::r600 || getArch() == Triple::amdgcn;
}
Index: clang/test/OpenMP/target_parallel_no_exceptions.cpp
===================================================================
--- clang/test/OpenMP/target_parallel_no_exceptions.cpp
+++ clang/test/OpenMP/target_parallel_no_exceptions.cpp
@@ -1,6 +1,7 @@
/// Make sure no exception messages are inclided in the llvm output.
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHK-EXCEPTION
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHK-EXCEPTION
void test_increment() {
#pragma omp target
Index: clang/test/Driver/openmp-offload-gpu.c
===================================================================
--- clang/test/Driver/openmp-offload-gpu.c
+++ clang/test/Driver/openmp-offload-gpu.c
@@ -6,6 +6,7 @@
// REQUIRES: x86-registered-target
// REQUIRES: powerpc-registered-target
// REQUIRES: nvptx-registered-target
+// REQUIRES: amdgpu-registered-target
/// ###########################################################################
@@ -249,30 +250,49 @@
// HAS_DEBUG-SAME: "--return-at-end"
// HAS_DEBUG: nvlink
// HAS_DEBUG-SAME: "-g"
+// CUDA_MODE: clang{{.*}}"-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}"
+// CUDA_MODE-SAME: "-fopenmp-cuda-mode"
+// NO_CUDA_MODE-NOT: "-{{fno-|f}}openmp-cuda-mode"
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=CUDA_MODE %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=CUDA_MODE %s
-// CUDA_MODE: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
-// CUDA_MODE-SAME: "-fopenmp-cuda-mode"
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
-// NO_CUDA_MODE-NOT: "-{{fno-|f}}openmp-cuda-mode"
+
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode 2>&1 \
+// RUN: | FileCheck -check-prefix=CUDA_MODE %s
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
+// RUN: | FileCheck -check-prefix=CUDA_MODE %s
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode 2>&1 \
+// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
+// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
+
+// FULL_RUNTIME: clang{{.*}}"-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}"
+// FULL_RUNTIME-SAME: "-fopenmp-cuda-force-full-runtime"
+// NO_FULL_RUNTIME-NOT: "-{{fno-|f}}openmp-cuda-force-full-runtime"
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime 2>&1 \
// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime -fopenmp-cuda-force-full-runtime 2>&1 \
// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s
-// FULL_RUNTIME: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
-// FULL_RUNTIME-SAME: "-fopenmp-cuda-force-full-runtime"
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime 2>&1 \
// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime -fno-openmp-cuda-force-full-runtime 2>&1 \
// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s
-// NO_FULL_RUNTIME-NOT: "-{{fno-|f}}openmp-cuda-force-full-runtime"
+
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-force-full-runtime 2>&1 \
+// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-force-full-runtime -fopenmp-cuda-force-full-runtime 2>&1 \
+// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-force-full-runtime 2>&1 \
+// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-force-full-runtime -fno-openmp-cuda-force-full-runtime 2>&1 \
+// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-teams-reduction-recs-num=2048 2>&1 \
// RUN: | FileCheck -check-prefix=CUDA_RED_RECS %s
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -22,6 +22,7 @@
#include "clang/Basic/ObjCRuntime.h"
#include "clang/Basic/Sanitizers.h"
#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
#include "clang/Basic/Version.h"
#include "clang/Basic/Visibility.h"
@@ -3096,9 +3097,11 @@
}
}
+ bool IsOpenMPGPU = clang::TargetInfo::isOpenMPGPU(T);
+
// Set the flag to prevent the implementation from emitting device exception
// handling code for those requiring so.
- if ((Opts.OpenMPIsDevice && T.isNVPTX()) || Opts.OpenCLCPlusPlus) {
+ if ((Opts.OpenMPIsDevice && IsOpenMPGPU) || Opts.OpenCLCPlusPlus) {
Opts.Exceptions = 0;
Opts.CXXExceptions = 0;
}
@@ -3132,6 +3135,7 @@
TT.getArch() == llvm::Triple::ppc64le ||
TT.getArch() == llvm::Triple::nvptx ||
TT.getArch() == llvm::Triple::nvptx64 ||
+ TT.getArch() == llvm::Triple::amdgcn ||
TT.getArch() == llvm::Triple::x86 ||
TT.getArch() == llvm::Triple::x86_64))
Diags.Report(diag::err_drv_invalid_omp_target) << A->getValue(i);
@@ -3149,13 +3153,13 @@
<< Opts.OMPHostIRFile;
}
- // Set CUDA mode for OpenMP target NVPTX if specified in options
- Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && T.isNVPTX() &&
+ // Set CUDA mode for OpenMP target NVPTX/AMDGCN if specified in options
+ Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && IsOpenMPGPU &&
Args.hasArg(options::OPT_fopenmp_cuda_mode);
- // Set CUDA mode for OpenMP target NVPTX if specified in options
+ // Set CUDA mode for OpenMP target NVPTX/AMDGCN if specified in options
Opts.OpenMPCUDAForceFullRuntime =
- Opts.OpenMPIsDevice && T.isNVPTX() &&
+ Opts.OpenMPIsDevice && IsOpenMPGPU &&
Args.hasArg(options::OPT_fopenmp_cuda_force_full_runtime);
// Record whether the __DEPRECATED define was requested.
Index: clang/lib/AST/Decl.cpp
===================================================================
--- clang/lib/AST/Decl.cpp
+++ clang/lib/AST/Decl.cpp
@@ -3214,10 +3214,11 @@
Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))
return 0;
- // CUDA does not have device-side standard library. printf and malloc are the
- // only special cases that are supported by device-side runtime.
- if (Context.getLangOpts().CUDA && hasAttr<CUDADeviceAttr>() &&
- !hasAttr<CUDAHostAttr>() &&
+ // CUDA/HIP does not have device-side standard library. printf and malloc are
+ // the only special cases that are supported by device-side runtime.
+ if (((Context.getLangOpts().CUDA && hasAttr<CUDADeviceAttr>() &&
+ !hasAttr<CUDAHostAttr>()) ||
+ Context.getTargetInfo().getTriple().isAMDGCN()) &&
!(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc))
return 0;
Index: clang/include/clang/Basic/TargetInfo.h
===================================================================
--- clang/include/clang/Basic/TargetInfo.h
+++ clang/include/clang/Basic/TargetInfo.h
@@ -1256,6 +1256,12 @@
getTriple().getArch() == llvm::Triple::aarch64);
}
+ /// Tests whether the target is an OpenMP-compatible GPU architecture
+ /// Currently only supports NVPTX and AMDGCN
+ static bool isOpenMPGPU(llvm::Triple &T) {
+ return T.isNVPTX() || T.isAMDGCN();
+ }
+
/// Return true if {|} are normal characters in the asm string.
///
/// If this returns false (the default), then {abc|xyz} is syntax
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits