jdoerfert created this revision.
jdoerfert added reviewers: tra, hfinkel.
Herald added subscribers: guansong, bollu, yaxunl.
Herald added a project: clang.

Due to recent changes we cannot use OpenMP in CUDA files anymore
(PR45533) as the math handling of CUDA is different when _OPENMP is
defined. We actually want this different behavior only if we are
offloading with OpenMP to NVIDIA, thus generating NVPTX. With this patch
we do not interfere with the CUDA math handling except if we are in
NVPTX offloading mode, as indicated by the presence of __OPENMP_NVPTX__.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D78155

Files:
  clang/lib/Headers/__clang_cuda_cmath.h
  clang/lib/Headers/__clang_cuda_device_functions.h
  clang/lib/Headers/__clang_cuda_libdevice_declares.h
  clang/lib/Headers/__clang_cuda_math.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
  clang/lib/Headers/openmp_wrappers/cmath
  clang/lib/Headers/openmp_wrappers/math.h

Index: clang/lib/Headers/openmp_wrappers/math.h
===================================================================
--- clang/lib/Headers/openmp_wrappers/math.h
+++ clang/lib/Headers/openmp_wrappers/math.h
@@ -41,7 +41,9 @@
     device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
 
 #define __CUDA__
+#define __OPENMP_NVPTX__
 #include <__clang_cuda_math.h>
+#undef __OPENMP_NVPTX__
 #undef __CUDA__
 
 #pragma omp end declare variant
Index: clang/lib/Headers/openmp_wrappers/cmath
===================================================================
--- clang/lib/Headers/openmp_wrappers/cmath
+++ clang/lib/Headers/openmp_wrappers/cmath
@@ -28,7 +28,9 @@
     device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
 
 #define __CUDA__
+#define __OPENMP_NVPTX__
 #include <__clang_cuda_cmath.h>
+#undef __OPENMP_NVPTX__
 #undef __CUDA__
 
 // Overloads not provided by the CUDA wrappers but by the CUDA system headers.
Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
===================================================================
--- clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
+++ clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
@@ -22,11 +22,15 @@
 #endif
 
 #define __CUDA__
+#define __OPENMP_NVPTX__
+
 /// Include declarations for libdevice functions.
 #include <__clang_cuda_libdevice_declares.h>
 
 /// Provide definitions for these functions.
 #include <__clang_cuda_device_functions.h>
+
+#undef __OPENMP_NVPTX__
 #undef __CUDA__
 
 #ifdef __cplusplus
Index: clang/lib/Headers/__clang_cuda_math.h
===================================================================
--- clang/lib/Headers/__clang_cuda_math.h
+++ clang/lib/Headers/__clang_cuda_math.h
@@ -12,7 +12,7 @@
 #error "This file is for CUDA compilation only."
 #endif
 
-#ifndef _OPENMP
+#ifndef __OPENMP_NVPTX__
 #if CUDA_VERSION < 9000
 #error This file is intended to be used with CUDA-9+ only.
 #endif
@@ -22,7 +22,7 @@
 // we implement in this file. We need static in order to avoid emitting unused
 // functions and __forceinline__ helps inlining these wrappers at -O1.
 #pragma push_macro("__DEVICE__")
-#ifdef _OPENMP
+#ifdef __OPENMP_NVPTX__
 #if defined(__cplusplus)
 #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
 #else
@@ -36,7 +36,7 @@
 // because the OpenMP overlay requires constexpr functions here but prior to
 // c++14 void return functions could not be constexpr.
 #pragma push_macro("__DEVICE_VOID__")
-#ifdef _OPENMP && defined(__cplusplus) && __cplusplus < 201402L
+#ifdef __OPENMP_NVPTX__ && defined(__cplusplus) && __cplusplus < 201402L
 #define __DEVICE_VOID__ static __attribute__((always_inline, nothrow))
 #else
 #define __DEVICE_VOID__ __DEVICE__
Index: clang/lib/Headers/__clang_cuda_libdevice_declares.h
===================================================================
--- clang/lib/Headers/__clang_cuda_libdevice_declares.h
+++ clang/lib/Headers/__clang_cuda_libdevice_declares.h
@@ -14,7 +14,7 @@
 extern "C" {
 #endif
 
-#if defined(_OPENMP)
+#if defined(__OPENMP_NVPTX__)
 #define __DEVICE__
 #elif defined(__CUDA__)
 #define __DEVICE__ __device__
Index: clang/lib/Headers/__clang_cuda_device_functions.h
===================================================================
--- clang/lib/Headers/__clang_cuda_device_functions.h
+++ clang/lib/Headers/__clang_cuda_device_functions.h
@@ -10,7 +10,7 @@
 #ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__
 #define __CLANG_CUDA_DEVICE_FUNCTIONS_H__
 
-#ifndef _OPENMP
+#ifndef __OPENMP_NVPTX__
 #if CUDA_VERSION < 9000
 #error This file is intended to be used with CUDA-9+ only.
 #endif
@@ -20,7 +20,7 @@
 // we implement in this file. We need static in order to avoid emitting unused
 // functions and __forceinline__ helps inlining these wrappers at -O1.
 #pragma push_macro("__DEVICE__")
-#ifdef _OPENMP
+#ifdef __OPENMP_NVPTX__
 #define __DEVICE__ static __attribute__((always_inline, nothrow))
 #else
 #define __DEVICE__ static __device__ __forceinline__
@@ -1466,14 +1466,14 @@
 
 // For OpenMP we require the user to include <time.h> as we need to know what
 // clock_t is on the system.
-#ifndef _OPENMP
+#ifndef __OPENMP_NVPTX__
 __DEVICE__ /* clock_t= */ int clock() { return __nvvm_read_ptx_sreg_clock(); }
 #endif
 __DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }
 
 // These functions shouldn't be declared when including this header
 // for math function resolution purposes.
-#ifndef _OPENMP
+#ifndef __OPENMP_NVPTX__
 __DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) {
   return __builtin_memcpy(__a, __b, __c);
 }
Index: clang/lib/Headers/__clang_cuda_cmath.h
===================================================================
--- clang/lib/Headers/__clang_cuda_cmath.h
+++ clang/lib/Headers/__clang_cuda_cmath.h
@@ -12,7 +12,7 @@
 #error "This file is for CUDA compilation only."
 #endif
 
-#ifndef _OPENMP
+#ifndef __OPENMP_NVPTX__
 #include <limits>
 #endif
 
@@ -32,7 +32,7 @@
 // implementation.  Declaring in the global namespace and pulling into namespace
 // std covers all of the known knowns.
 
-#ifdef _OPENMP
+#ifdef __OPENMP_NVPTX__
 #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
 #else
 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
@@ -69,7 +69,7 @@
 // Windows. For OpenMP we omit these as some old system headers have
 // non-conforming `isinf(float)` and `isnan(float)` implementations that return
 // an `int`. The system versions of these functions should be fine anyway.
-#if !defined(_MSC_VER) && !defined(_OPENMP)
+#if !defined(_MSC_VER) && !defined(__OPENMP_NVPTX__)
 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
@@ -146,7 +146,7 @@
 // libdevice doesn't provide an implementation, and we don't want to be in the
 // business of implementing tricky libm functions in this header.
 
-#ifndef _OPENMP
+#ifndef __OPENMP_NVPTX__
 
 // Now we've defined everything we promised we'd define in
 // __clang_cuda_math_forward_declares.h.  We need to do two additional things to
@@ -463,7 +463,7 @@
 } // namespace std
 #endif
 
-#endif // _OPENMP
+#endif // __OPENMP_NVPTX__
 
 #undef __DEVICE__
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to