[PATCH] D59361: [CUDA][Windows] Partial fix for bug 38811 (Step 1 of 3)

2019-03-14 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov created this revision.
emankov added a reviewer: tra.
Herald added subscribers: cfe-commits, jdoerfert.
Herald added a project: clang.

Partial fix for the clang Bug 38811 
 "Clang fails to compile with 
CUDA-9.x on Windows".

Adding  `defined(_WIN64)` check along with existing `#if defined(__LP64__)` 
eliminates the below clang (64-bit) compilation error on Windows.

  
C:/GIT/LLVM/trunk/llvm-64-release-vs2017/dist/lib/clang/9.0.0\include\__clang_cuda_device_functions.h(1609,45):
 error GEF7559A7: no matching function for call to 'roundf'
   __DEVICE__ long lroundf(float __a) { return roundf(__a); }

[How to repro]

  clang++.exe -x cuda "c:\ProgramData\NVIDIA Corporation\CUDA 
Samples\v9.0\0_Simple\simplePrintf\simplePrintf.cu" -I"c:\ProgramData\NVIDIA 
Corporation\CUDA Samples\v8.0\common\inc" --cuda-gpu-arch=sm_50 
--cuda-path="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0" 
-L"c:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\lib\x64" 
-lcudart.lib  -v




Repository:
  rC Clang

https://reviews.llvm.org/D59361

Files:
  clang/lib/Headers/__clang_cuda_device_functions.h


Index: clang/lib/Headers/__clang_cuda_device_functions.h
===
--- clang/lib/Headers/__clang_cuda_device_functions.h
+++ clang/lib/Headers/__clang_cuda_device_functions.h
@@ -1563,7 +1563,7 @@
 __DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }
 __DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }
 __DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }
-#if defined(__LP64__)
+#if defined(__LP64__) || defined(_WIN64)
 __DEVICE__ long labs(long __a) { return llabs(__a); };
 #else
 __DEVICE__ long labs(long __a) { return __nv_abs(__a); };
@@ -1597,7 +1597,7 @@
 __DEVICE__ float logf(float __a) {
   return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a);
 }
-#if defined(__LP64__)
+#if defined(__LP64__) || defined(_WIN64)
 __DEVICE__ long lrint(double __a) { return llrint(__a); }
 __DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); }
 __DEVICE__ long lround(double __a) { return llround(__a); }


Index: clang/lib/Headers/__clang_cuda_device_functions.h
===
--- clang/lib/Headers/__clang_cuda_device_functions.h
+++ clang/lib/Headers/__clang_cuda_device_functions.h
@@ -1563,7 +1563,7 @@
 __DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }
 __DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }
 __DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }
-#if defined(__LP64__)
+#if defined(__LP64__) || defined(_WIN64)
 __DEVICE__ long labs(long __a) { return llabs(__a); };
 #else
 __DEVICE__ long labs(long __a) { return __nv_abs(__a); };
@@ -1597,7 +1597,7 @@
 __DEVICE__ float logf(float __a) {
   return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a);
 }
-#if defined(__LP64__)
+#if defined(__LP64__) || defined(_WIN64)
 __DEVICE__ long lrint(double __a) { return llrint(__a); }
 __DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); }
 __DEVICE__ long lround(double __a) { return llround(__a); }
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59361: [CUDA][Windows] Partial fix for bug 38811 (Step 1 of 3)

2019-03-15 Thread Evgeny Mankov via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rC356255: [CUDA][Windows] Partial fix for bug #38811 (Step 1 
of 3) (authored by emankov, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D59361?vs=190607&id=190809#toc

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59361/new/

https://reviews.llvm.org/D59361

Files:
  lib/Headers/__clang_cuda_device_functions.h


Index: lib/Headers/__clang_cuda_device_functions.h
===
--- lib/Headers/__clang_cuda_device_functions.h
+++ lib/Headers/__clang_cuda_device_functions.h
@@ -1563,7 +1563,7 @@
 __DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }
 __DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }
 __DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }
-#if defined(__LP64__)
+#if defined(__LP64__) || defined(_WIN64)
 __DEVICE__ long labs(long __a) { return llabs(__a); };
 #else
 __DEVICE__ long labs(long __a) { return __nv_abs(__a); };
@@ -1597,7 +1597,7 @@
 __DEVICE__ float logf(float __a) {
   return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a);
 }
-#if defined(__LP64__)
+#if defined(__LP64__) || defined(_WIN64)
 __DEVICE__ long lrint(double __a) { return llrint(__a); }
 __DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); }
 __DEVICE__ long lround(double __a) { return llround(__a); }


Index: lib/Headers/__clang_cuda_device_functions.h
===
--- lib/Headers/__clang_cuda_device_functions.h
+++ lib/Headers/__clang_cuda_device_functions.h
@@ -1563,7 +1563,7 @@
 __DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }
 __DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }
 __DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }
-#if defined(__LP64__)
+#if defined(__LP64__) || defined(_WIN64)
 __DEVICE__ long labs(long __a) { return llabs(__a); };
 #else
 __DEVICE__ long labs(long __a) { return __nv_abs(__a); };
@@ -1597,7 +1597,7 @@
 __DEVICE__ float logf(float __a) {
   return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a);
 }
-#if defined(__LP64__)
+#if defined(__LP64__) || defined(_WIN64)
 __DEVICE__ long lrint(double __a) { return llrint(__a); }
 __DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); }
 __DEVICE__ long lround(double __a) { return llround(__a); }
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59423: [CUDA][Windows] Partial fix for bug 38811 (Step 2 of 3)

2019-03-15 Thread Evgeny Mankov via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rC356291: [CUDA][Windows] Partial fix for bug 38811 (Step 2 of 
3) (authored by emankov, committed by ).
Herald added a subscriber: cfe-commits.

Changed prior to commit:
  https://reviews.llvm.org/D59423?vs=190859&id=190867#toc

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59423/new/

https://reviews.llvm.org/D59423

Files:
  lib/Headers/__clang_cuda_device_functions.h
  lib/Headers/__clang_cuda_libdevice_declares.h


Index: lib/Headers/__clang_cuda_libdevice_declares.h
===
--- lib/Headers/__clang_cuda_libdevice_declares.h
+++ lib/Headers/__clang_cuda_libdevice_declares.h
@@ -141,7 +141,7 @@
 __device__ float __nv_fast_log2f(float __a);
 __device__ float __nv_fast_logf(float __a);
 __device__ float __nv_fast_powf(float __a, float __b);
-__device__ void __nv_fast_sincosf(float __a, float *__sptr, float *__cptr);
+__device__ void __nv_fast_sincosf(float __a, float *__s, float *__c);
 __device__ float __nv_fast_sinf(float __a);
 __device__ float __nv_fast_tanf(float __a);
 __device__ double __nv_fdim(double __a, double __b);
Index: lib/Headers/__clang_cuda_device_functions.h
===
--- lib/Headers/__clang_cuda_device_functions.h
+++ lib/Headers/__clang_cuda_device_functions.h
@@ -520,8 +520,8 @@
 __DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); }
 __DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); }
 __DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); }
-__DEVICE__ void __sincosf(float __a, float *__sptr, float *__cptr) {
-  return __nv_fast_sincosf(__a, __sptr, __cptr);
+__DEVICE__ void __sincosf(float __a, float *__s, float *__c) {
+  return __nv_fast_sincosf(__a, __s, __c);
 }
 __DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); }
 __DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); }
@@ -1713,17 +1713,17 @@
   return scalbnf(__a, (int)__b);
 }
 __DEVICE__ double sin(double __a) { return __nv_sin(__a); }
-__DEVICE__ void sincos(double __a, double *__sptr, double *__cptr) {
-  return __nv_sincos(__a, __sptr, __cptr);
+__DEVICE__ void sincos(double __a, double *__s, double *__c) {
+  return __nv_sincos(__a, __s, __c);
 }
-__DEVICE__ void sincosf(float __a, float *__sptr, float *__cptr) {
-  return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __sptr, __cptr);
+__DEVICE__ void sincosf(float __a, float *__s, float *__c) {
+  return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __s, __c);
 }
-__DEVICE__ void sincospi(double __a, double *__sptr, double *__cptr) {
-  return __nv_sincospi(__a, __sptr, __cptr);
+__DEVICE__ void sincospi(double __a, double *__s, double *__c) {
+  return __nv_sincospi(__a, __s, __c);
 }
-__DEVICE__ void sincospif(float __a, float *__sptr, float *__cptr) {
-  return __nv_sincospif(__a, __sptr, __cptr);
+__DEVICE__ void sincospif(float __a, float *__s, float *__c) {
+  return __nv_sincospif(__a, __s, __c);
 }
 __DEVICE__ float sinf(float __a) {
   return __FAST_OR_SLOW(__nv_fast_sinf, __nv_sinf)(__a);


Index: lib/Headers/__clang_cuda_libdevice_declares.h
===
--- lib/Headers/__clang_cuda_libdevice_declares.h
+++ lib/Headers/__clang_cuda_libdevice_declares.h
@@ -141,7 +141,7 @@
 __device__ float __nv_fast_log2f(float __a);
 __device__ float __nv_fast_logf(float __a);
 __device__ float __nv_fast_powf(float __a, float __b);
-__device__ void __nv_fast_sincosf(float __a, float *__sptr, float *__cptr);
+__device__ void __nv_fast_sincosf(float __a, float *__s, float *__c);
 __device__ float __nv_fast_sinf(float __a);
 __device__ float __nv_fast_tanf(float __a);
 __device__ double __nv_fdim(double __a, double __b);
Index: lib/Headers/__clang_cuda_device_functions.h
===
--- lib/Headers/__clang_cuda_device_functions.h
+++ lib/Headers/__clang_cuda_device_functions.h
@@ -520,8 +520,8 @@
 __DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); }
 __DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); }
 __DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); }
-__DEVICE__ void __sincosf(float __a, float *__sptr, float *__cptr) {
-  return __nv_fast_sincosf(__a, __sptr, __cptr);
+__DEVICE__ void __sincosf(float __a, float *__s, float *__c) {
+  return __nv_fast_sincosf(__a, __s, __c);
 }
 __DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); }
 __DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); }
@@ -1713,17 +1713,17 @@
   return scalbnf(__a, (int)__b);
 }
 __DEVICE__ double sin(double __a) { return __nv_sin(__a); }
-__DEVICE__ void sincos(double __a, double *__sptr, double *__cptr) {
-  return __nv_sincos(__a, __sptr, __cptr);
+__DEVICE__ void sincos(double __a, 

[PATCH] D60220: [CUDA][Windows] Final fix for bug 38811 (Step 3 of 3)

2019-04-03 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov created this revision.
emankov added a reviewer: tra.
emankov added a project: clang.
Herald added a subscriber: cfe-commits.

Last fix for the clang Bug 38811  
"Clang fails to compile with CUDA-9.x on Windows".

**[IMPORTANT]**
With that last fix, CUDA has just started being compiling by clang on Windows 
after nearly a year and two clang’s major releases (7 and 8).
As long as the last LLVM release, in which clang was compiling CUDA on Windows 
successfully, was 6.0.1, this fix and two previous have to be included into 
upcoming 7.1.0 and 8.0.1 releases.

[How to repro]

  clang++.exe -x cuda "c:\ProgramData\NVIDIA Corporation\CUDA 
Samples\v9.0\0_Simple\simplePrintf\simplePrintf.cu" -I"c:\ProgramData\NVIDIA 
Corporation\CUDA Samples\v9.0\common\inc" --cuda-gpu-arch=sm_50 
--cuda-path="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0" 
-L"c:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\lib\x64" 
-lcudart.lib  -v

[Output]

  In file included from 
C:\GIT\LLVM\trunk-for-submits\llvm-64-release-vs2017-15.9.5\dist\lib\clang\9.0.0\include\__clang_cuda_runtime_wrapper.h:327:
  C:\Program Files\NVIDIA GPU Computing 
Toolkit\CUDA\v9.0/include\crt/math_functions.hpp:390:11: error: no matching 
function for call to '__isinfl'
return (__isinfl(a) != 0);
^~~~
  C:\Program Files\NVIDIA GPU Computing 
Toolkit\CUDA\v9.0/include\crt/math_functions.hpp:2662:14: note: candidate 
function not viable: call to __host__ function from __device__ function
  __func__(int __isinfl(long double a))
   ^
  In file included from :1:
  In file included from 
C:\GIT\LLVM\trunk-for-submits\llvm-64-release-vs2017-15.9.5\dist\lib\clang\9.0.0\include\__clang_cuda_runtime_wrapper.h:327:
  C:\Program Files\NVIDIA GPU Computing 
Toolkit\CUDA\v9.0/include\crt/math_functions.hpp:438:11: error: no matching 
function for call to '__isnanl'
return (__isnanl(a) != 0);
^~~~
  C:\Program Files\NVIDIA GPU Computing 
Toolkit\CUDA\v9.0/include\crt/math_functions.hpp:2672:14: note: candidate 
function not viable: call to __host__ function from __device__ function
  __func__(int __isnanl(long double a))
   ^
  In file included from :1:
  In file included from 
C:\GIT\LLVM\trunk-for-submits\llvm-64-release-vs2017-15.9.5\dist\lib\clang\9.0.0\include\__clang_cuda_runtime_wrapper.h:327:
  C:\Program Files\NVIDIA GPU Computing 
Toolkit\CUDA\v9.0/include\crt/math_functions.hpp:486:11: error: no matching 
function for call to '__finitel'
return (__finitel(a) != 0);
^
  C:\Program Files\NVIDIA GPU Computing 
Toolkit\CUDA\v9.0/include\crt/math_functions.hpp:2652:14: note: candidate 
function not viable: call to __host__ function from __device__ function
  __func__(int __finitel(long double a))
   ^
  3 errors generated when compiling for sm_50.

[Solution]
Add missing device functions' declarations and definitions.


Repository:
  rC Clang

https://reviews.llvm.org/D60220

Files:
  clang/lib/Headers/__clang_cuda_cmath.h
  clang/lib/Headers/__clang_cuda_device_functions.h
  clang/lib/Headers/__clang_cuda_math_forward_declares.h


Index: clang/lib/Headers/__clang_cuda_math_forward_declares.h
===
--- clang/lib/Headers/__clang_cuda_math_forward_declares.h
+++ clang/lib/Headers/__clang_cuda_math_forward_declares.h
@@ -98,12 +98,14 @@
 __DEVICE__ float hypot(float, float);
 __DEVICE__ int ilogb(double);
 __DEVICE__ int ilogb(float);
+__DEVICE__ bool isfinite(long double);
 __DEVICE__ bool isfinite(double);
 __DEVICE__ bool isfinite(float);
 __DEVICE__ bool isgreater(double, double);
 __DEVICE__ bool isgreaterequal(double, double);
 __DEVICE__ bool isgreaterequal(float, float);
 __DEVICE__ bool isgreater(float, float);
+__DEVICE__ bool isinf(long double);
 __DEVICE__ bool isinf(double);
 __DEVICE__ bool isinf(float);
 __DEVICE__ bool isless(double, double);
@@ -112,6 +114,7 @@
 __DEVICE__ bool isless(float, float);
 __DEVICE__ bool islessgreater(double, double);
 __DEVICE__ bool islessgreater(float, float);
+__DEVICE__ bool isnan(long double);
 __DEVICE__ bool isnan(double);
 __DEVICE__ bool isnan(float);
 __DEVICE__ bool isnormal(double);
Index: clang/lib/Headers/__clang_cuda_device_functions.h
===
--- clang/lib/Headers/__clang_cuda_device_functions.h
+++ clang/lib/Headers/__clang_cuda_device_functions.h
@@ -237,6 +237,7 @@
 __DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }
 __DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }
 __DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }
+__DEVICE__ int __finitel(long double __a) { return __finite((double)__a); }
 __DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }
 __DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }
 __DEVICE__ int __float2int_r

[PATCH] D60220: [CUDA][Windows] Final fix for bug 38811 (Step 3 of 3)

2019-04-05 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov updated this revision to Diff 193874.
emankov added a comment.

Provide only declarations for missing long double device functions to prevent 
any use of `long double` on the device side, because CUDA does not support 
`long double` on the device side.

[Testing]
{Windows 10, Ubuntu 16.04.5}/{Visual C++ 2017 15.9.9, gcc+ 5.4.0}/CUDA {8.0, 
9.0, 9.1, 9.2, 10.0, 10.1}


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60220/new/

https://reviews.llvm.org/D60220

Files:
  clang/lib/Headers/__clang_cuda_cmath.h
  clang/lib/Headers/__clang_cuda_device_functions.h
  clang/lib/Headers/__clang_cuda_math_forward_declares.h


Index: clang/lib/Headers/__clang_cuda_math_forward_declares.h
===
--- clang/lib/Headers/__clang_cuda_math_forward_declares.h
+++ clang/lib/Headers/__clang_cuda_math_forward_declares.h
@@ -98,12 +98,14 @@
 __DEVICE__ float hypot(float, float);
 __DEVICE__ int ilogb(double);
 __DEVICE__ int ilogb(float);
+__DEVICE__ bool isfinite(long double);
 __DEVICE__ bool isfinite(double);
 __DEVICE__ bool isfinite(float);
 __DEVICE__ bool isgreater(double, double);
 __DEVICE__ bool isgreaterequal(double, double);
 __DEVICE__ bool isgreaterequal(float, float);
 __DEVICE__ bool isgreater(float, float);
+__DEVICE__ bool isinf(long double);
 __DEVICE__ bool isinf(double);
 __DEVICE__ bool isinf(float);
 __DEVICE__ bool isless(double, double);
@@ -112,6 +114,7 @@
 __DEVICE__ bool isless(float, float);
 __DEVICE__ bool islessgreater(double, double);
 __DEVICE__ bool islessgreater(float, float);
+__DEVICE__ bool isnan(long double);
 __DEVICE__ bool isnan(double);
 __DEVICE__ bool isnan(float);
 __DEVICE__ bool isnormal(double);
Index: clang/lib/Headers/__clang_cuda_device_functions.h
===
--- clang/lib/Headers/__clang_cuda_device_functions.h
+++ clang/lib/Headers/__clang_cuda_device_functions.h
@@ -237,6 +237,7 @@
 __DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }
 __DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }
 __DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }
+__DEVICE__ int __finitel(long double __a);
 __DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }
 __DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }
 __DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }
@@ -445,8 +446,10 @@
 __DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); }
 __DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); }
 __DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); }
+__DEVICE__ int __isinfl(long double __a);
 __DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); }
 __DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); }
+__DEVICE__ int __isnanl(long double __a);
 __DEVICE__ double __ll2double_rd(long long __a) {
   return __nv_ll2double_rd(__a);
 }
Index: clang/lib/Headers/__clang_cuda_cmath.h
===
--- clang/lib/Headers/__clang_cuda_cmath.h
+++ clang/lib/Headers/__clang_cuda_cmath.h
@@ -78,13 +78,16 @@
 #ifndef _MSC_VER
 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ bool isinf(long double __x) { return ::__isinfl(__x); }
 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
 // For inscrutable reasons, __finite(), the double-precision version of
 // __finitef, does not exist when compiling for MacOS.  __isfinited is 
available
 // everywhere and is just as good.
 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
+__DEVICE__ bool isfinite(long double __x) { return ::__finitel(__x); }
 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
+__DEVICE__ bool isnan(long double __x) { return ::__isnanl(__x); }
 #endif
 
 __DEVICE__ bool isgreater(float __x, float __y) {


Index: clang/lib/Headers/__clang_cuda_math_forward_declares.h
===
--- clang/lib/Headers/__clang_cuda_math_forward_declares.h
+++ clang/lib/Headers/__clang_cuda_math_forward_declares.h
@@ -98,12 +98,14 @@
 __DEVICE__ float hypot(float, float);
 __DEVICE__ int ilogb(double);
 __DEVICE__ int ilogb(float);
+__DEVICE__ bool isfinite(long double);
 __DEVICE__ bool isfinite(double);
 __DEVICE__ bool isfinite(float);
 __DEVICE__ bool isgreater(double, double);
 __DEVICE__ bool isgreaterequal(double, double);
 __DEVICE__ bool isgreaterequal(float, float);
 __DEVICE__ bool isgreater(float, float);
+__DEVICE__ bool isinf(long double);
 __DEVICE__ bool isinf(double);
 __DEVICE__ bool isinf(float);
 __DEVICE__ bool isless(double, double);
@@ -112,6 +114,7 @@
 __DEVICE__ bool isless(float, float);
 __DEVICE__ bool islessgreater(double, double);
 __DEVICE__ bool islessgr

[PATCH] D60220: [CUDA][Windows] Final fix for bug 38811 (Step 3 of 3)

2019-04-05 Thread Evgeny Mankov via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rC357779: [CUDA][Windows] Last fix for the clang Bug 38811 
"Clang fails to compile with… (authored by emankov, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D60220?vs=193874&id=193904#toc

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60220/new/

https://reviews.llvm.org/D60220

Files:
  lib/Headers/__clang_cuda_cmath.h
  lib/Headers/__clang_cuda_device_functions.h
  lib/Headers/__clang_cuda_math_forward_declares.h


Index: lib/Headers/__clang_cuda_cmath.h
===
--- lib/Headers/__clang_cuda_cmath.h
+++ lib/Headers/__clang_cuda_cmath.h
@@ -78,13 +78,16 @@
 #ifndef _MSC_VER
 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ bool isinf(long double __x) { return ::__isinfl(__x); }
 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
 // For inscrutable reasons, __finite(), the double-precision version of
 // __finitef, does not exist when compiling for MacOS.  __isfinited is 
available
 // everywhere and is just as good.
 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
+__DEVICE__ bool isfinite(long double __x) { return ::__finitel(__x); }
 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
+__DEVICE__ bool isnan(long double __x) { return ::__isnanl(__x); }
 #endif
 
 __DEVICE__ bool isgreater(float __x, float __y) {
Index: lib/Headers/__clang_cuda_device_functions.h
===
--- lib/Headers/__clang_cuda_device_functions.h
+++ lib/Headers/__clang_cuda_device_functions.h
@@ -237,6 +237,7 @@
 __DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }
 __DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }
 __DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }
+__DEVICE__ int __finitel(long double __a);
 __DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }
 __DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }
 __DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }
@@ -445,8 +446,10 @@
 __DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); }
 __DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); }
 __DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); }
+__DEVICE__ int __isinfl(long double __a);
 __DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); }
 __DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); }
+__DEVICE__ int __isnanl(long double __a);
 __DEVICE__ double __ll2double_rd(long long __a) {
   return __nv_ll2double_rd(__a);
 }
Index: lib/Headers/__clang_cuda_math_forward_declares.h
===
--- lib/Headers/__clang_cuda_math_forward_declares.h
+++ lib/Headers/__clang_cuda_math_forward_declares.h
@@ -98,12 +98,14 @@
 __DEVICE__ float hypot(float, float);
 __DEVICE__ int ilogb(double);
 __DEVICE__ int ilogb(float);
+__DEVICE__ bool isfinite(long double);
 __DEVICE__ bool isfinite(double);
 __DEVICE__ bool isfinite(float);
 __DEVICE__ bool isgreater(double, double);
 __DEVICE__ bool isgreaterequal(double, double);
 __DEVICE__ bool isgreaterequal(float, float);
 __DEVICE__ bool isgreater(float, float);
+__DEVICE__ bool isinf(long double);
 __DEVICE__ bool isinf(double);
 __DEVICE__ bool isinf(float);
 __DEVICE__ bool isless(double, double);
@@ -112,6 +114,7 @@
 __DEVICE__ bool isless(float, float);
 __DEVICE__ bool islessgreater(double, double);
 __DEVICE__ bool islessgreater(float, float);
+__DEVICE__ bool isnan(long double);
 __DEVICE__ bool isnan(double);
 __DEVICE__ bool isnan(float);
 __DEVICE__ bool isnormal(double);


Index: lib/Headers/__clang_cuda_cmath.h
===
--- lib/Headers/__clang_cuda_cmath.h
+++ lib/Headers/__clang_cuda_cmath.h
@@ -78,13 +78,16 @@
 #ifndef _MSC_VER
 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ bool isinf(long double __x) { return ::__isinfl(__x); }
 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
 // For inscrutable reasons, __finite(), the double-precision version of
 // __finitef, does not exist when compiling for MacOS.  __isfinited is available
 // everywhere and is just as good.
 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
+__DEVICE__ bool isfinite(long double __x) { return ::__finitel(__x); }
 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
+__DEVICE__ bool isnan(long double __x) { return ::__isnanl(__x); }
 #endif
 
 __DEVICE__ bool isgreater(float __x, float

[PATCH] D60220: [CUDA][Windows] Final fix for bug 38811 (Step 3 of 3)

2019-04-05 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov added a comment.

Oooh, sorry, but I've just pushed the fix. But with the following words: "Add 
missing long double device functions' declarations. Provide only declarations 
to prevent any use of long double on the device side, because CUDA does not 
support long double on the device side."


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60220/new/

https://reviews.llvm.org/D60220



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60220: [CUDA][Windows] Final fix for bug 38811 (Step 3 of 3)

2019-04-05 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov added a comment.

In D60220#1456449 , @tra wrote:

> It's not a big deal at the moment -- there are no `long double` users in CUDA 
> on linux yet. You can clean up in another commit.
>  BTW, you may want to make commit description somewhat more concise than 
> rC357779 . Including all details of 
> reproduction, etc is way too much info which is better suited for the bug 
> report or review.


Ok, thanks, will do it.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60220/new/

https://reviews.llvm.org/D60220



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60818: [CUDA][Windows] restrict long double functions declarations to Windows

2019-04-17 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov created this revision.
emankov added a reviewer: tra.
emankov added a project: clang.
Herald added a subscriber: cfe-commits.

As agreed in D60220 , make `long double` 
declarations unobservable on non-windows platforms.


Repository:
  rC Clang

https://reviews.llvm.org/D60818

Files:
  clang/lib/Headers/__clang_cuda_cmath.h
  clang/lib/Headers/__clang_cuda_device_functions.h
  clang/lib/Headers/__clang_cuda_math_forward_declares.h


Index: clang/lib/Headers/__clang_cuda_math_forward_declares.h
===
--- clang/lib/Headers/__clang_cuda_math_forward_declares.h
+++ clang/lib/Headers/__clang_cuda_math_forward_declares.h
@@ -84,14 +84,18 @@
 __DEVICE__ float hypot(float, float);
 __DEVICE__ int ilogb(double);
 __DEVICE__ int ilogb(float);
+#ifdef _MSC_VER
 __DEVICE__ bool isfinite(long double);
+#endif
 __DEVICE__ bool isfinite(double);
 __DEVICE__ bool isfinite(float);
 __DEVICE__ bool isgreater(double, double);
 __DEVICE__ bool isgreaterequal(double, double);
 __DEVICE__ bool isgreaterequal(float, float);
 __DEVICE__ bool isgreater(float, float);
+#ifdef _MSC_VER
 __DEVICE__ bool isinf(long double);
+#endif
 __DEVICE__ bool isinf(double);
 __DEVICE__ bool isinf(float);
 __DEVICE__ bool isless(double, double);
@@ -100,7 +104,9 @@
 __DEVICE__ bool isless(float, float);
 __DEVICE__ bool islessgreater(double, double);
 __DEVICE__ bool islessgreater(float, float);
+#ifdef _MSC_VER
 __DEVICE__ bool isnan(long double);
+#endif
 __DEVICE__ bool isnan(double);
 __DEVICE__ bool isnan(float);
 __DEVICE__ bool isnormal(double);
Index: clang/lib/Headers/__clang_cuda_device_functions.h
===
--- clang/lib/Headers/__clang_cuda_device_functions.h
+++ clang/lib/Headers/__clang_cuda_device_functions.h
@@ -223,7 +223,9 @@
 __DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }
 __DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }
 __DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }
+#ifdef _MSC_VER
 __DEVICE__ int __finitel(long double __a);
+#endif
 __DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }
 __DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }
 __DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }
@@ -432,10 +434,14 @@
 __DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); }
 __DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); }
 __DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); }
+#ifdef _MSC_VER
 __DEVICE__ int __isinfl(long double __a);
+#endif
 __DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); }
 __DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); }
+#ifdef _MSC_VER
 __DEVICE__ int __isnanl(long double __a);
+#endif
 __DEVICE__ double __ll2double_rd(long long __a) {
   return __nv_ll2double_rd(__a);
 }
Index: clang/lib/Headers/__clang_cuda_cmath.h
===
--- clang/lib/Headers/__clang_cuda_cmath.h
+++ clang/lib/Headers/__clang_cuda_cmath.h
@@ -64,16 +64,13 @@
 #ifndef _MSC_VER
 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
-__DEVICE__ bool isinf(long double __x) { return ::__isinfl(__x); }
 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
 // For inscrutable reasons, __finite(), the double-precision version of
 // __finitef, does not exist when compiling for MacOS.  __isfinited is 
available
 // everywhere and is just as good.
 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
-__DEVICE__ bool isfinite(long double __x) { return ::__finitel(__x); }
 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
-__DEVICE__ bool isnan(long double __x) { return ::__isnanl(__x); }
 #endif
 
 __DEVICE__ bool isgreater(float __x, float __y) {


Index: clang/lib/Headers/__clang_cuda_math_forward_declares.h
===
--- clang/lib/Headers/__clang_cuda_math_forward_declares.h
+++ clang/lib/Headers/__clang_cuda_math_forward_declares.h
@@ -84,14 +84,18 @@
 __DEVICE__ float hypot(float, float);
 __DEVICE__ int ilogb(double);
 __DEVICE__ int ilogb(float);
+#ifdef _MSC_VER
 __DEVICE__ bool isfinite(long double);
+#endif
 __DEVICE__ bool isfinite(double);
 __DEVICE__ bool isfinite(float);
 __DEVICE__ bool isgreater(double, double);
 __DEVICE__ bool isgreaterequal(double, double);
 __DEVICE__ bool isgreaterequal(float, float);
 __DEVICE__ bool isgreater(float, float);
+#ifdef _MSC_VER
 __DEVICE__ bool isinf(long double);
+#endif
 __DEVICE__ bool isinf(double);
 __DEVICE__ bool isinf(float);
 __DEVICE__ bool isless(double, double);
@@ -100,7 +104,9 @@
 __DEVICE__ bool isless(float, float);
 __DEVICE__ bool islessgreater(doub

[PATCH] D60818: [CUDA][Windows] restrict long double device functions declarations to Windows

2019-04-18 Thread Evgeny Mankov via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rC358654: [CUDA][Windows] Restrict long double device 
functions declarations to Windows (authored by emankov, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D60818?vs=195546&id=195704#toc

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60818/new/

https://reviews.llvm.org/D60818

Files:
  lib/Headers/__clang_cuda_cmath.h
  lib/Headers/__clang_cuda_device_functions.h
  lib/Headers/__clang_cuda_math_forward_declares.h


Index: lib/Headers/__clang_cuda_math_forward_declares.h
===
--- lib/Headers/__clang_cuda_math_forward_declares.h
+++ lib/Headers/__clang_cuda_math_forward_declares.h
@@ -84,14 +84,18 @@
 __DEVICE__ float hypot(float, float);
 __DEVICE__ int ilogb(double);
 __DEVICE__ int ilogb(float);
+#ifdef _MSC_VER
 __DEVICE__ bool isfinite(long double);
+#endif
 __DEVICE__ bool isfinite(double);
 __DEVICE__ bool isfinite(float);
 __DEVICE__ bool isgreater(double, double);
 __DEVICE__ bool isgreaterequal(double, double);
 __DEVICE__ bool isgreaterequal(float, float);
 __DEVICE__ bool isgreater(float, float);
+#ifdef _MSC_VER
 __DEVICE__ bool isinf(long double);
+#endif
 __DEVICE__ bool isinf(double);
 __DEVICE__ bool isinf(float);
 __DEVICE__ bool isless(double, double);
@@ -100,7 +104,9 @@
 __DEVICE__ bool isless(float, float);
 __DEVICE__ bool islessgreater(double, double);
 __DEVICE__ bool islessgreater(float, float);
+#ifdef _MSC_VER
 __DEVICE__ bool isnan(long double);
+#endif
 __DEVICE__ bool isnan(double);
 __DEVICE__ bool isnan(float);
 __DEVICE__ bool isnormal(double);
Index: lib/Headers/__clang_cuda_device_functions.h
===
--- lib/Headers/__clang_cuda_device_functions.h
+++ lib/Headers/__clang_cuda_device_functions.h
@@ -223,7 +223,9 @@
 __DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }
 __DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }
 __DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }
+#ifdef _MSC_VER
 __DEVICE__ int __finitel(long double __a);
+#endif
 __DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }
 __DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }
 __DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }
@@ -432,10 +434,14 @@
 __DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); }
 __DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); }
 __DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); }
+#ifdef _MSC_VER
 __DEVICE__ int __isinfl(long double __a);
+#endif
 __DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); }
 __DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); }
+#ifdef _MSC_VER
 __DEVICE__ int __isnanl(long double __a);
+#endif
 __DEVICE__ double __ll2double_rd(long long __a) {
   return __nv_ll2double_rd(__a);
 }
Index: lib/Headers/__clang_cuda_cmath.h
===
--- lib/Headers/__clang_cuda_cmath.h
+++ lib/Headers/__clang_cuda_cmath.h
@@ -64,16 +64,13 @@
 #ifndef _MSC_VER
 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
-__DEVICE__ bool isinf(long double __x) { return ::__isinfl(__x); }
 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
 // For inscrutable reasons, __finite(), the double-precision version of
 // __finitef, does not exist when compiling for MacOS.  __isfinited is 
available
 // everywhere and is just as good.
 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
-__DEVICE__ bool isfinite(long double __x) { return ::__finitel(__x); }
 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
-__DEVICE__ bool isnan(long double __x) { return ::__isnanl(__x); }
 #endif
 
 __DEVICE__ bool isgreater(float __x, float __y) {


Index: lib/Headers/__clang_cuda_math_forward_declares.h
===
--- lib/Headers/__clang_cuda_math_forward_declares.h
+++ lib/Headers/__clang_cuda_math_forward_declares.h
@@ -84,14 +84,18 @@
 __DEVICE__ float hypot(float, float);
 __DEVICE__ int ilogb(double);
 __DEVICE__ int ilogb(float);
+#ifdef _MSC_VER
 __DEVICE__ bool isfinite(long double);
+#endif
 __DEVICE__ bool isfinite(double);
 __DEVICE__ bool isfinite(float);
 __DEVICE__ bool isgreater(double, double);
 __DEVICE__ bool isgreaterequal(double, double);
 __DEVICE__ bool isgreaterequal(float, float);
 __DEVICE__ bool isgreater(float, float);
+#ifdef _MSC_VER
 __DEVICE__ bool isinf(long double);
+#endif
 __DEVICE__ bool isinf(double);
 __DEVICE__ bool isinf(float);
 __DEVICE__ bool isless(double, double);
@@ -100,7 +104,9 @@
 __DEVICE__ bool isless(float, float);
 __DEVICE__ bo

[PATCH] D37386: [AMDGPU] Implement infrastructure to set options in AMDGPUToolChain

2017-09-01 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov added inline comments.



Comment at: lib/Driver/ToolChains/AMDGPU.cpp:60-62
+  for (auto *A : Args) {
+DAL->append(A);
+  }

redundant braces



Comment at: lib/Driver/ToolChains/AMDGPU.cpp:74-75
+// as they defined that way in Options.td
+if (!Args.hasArg(options::OPT_O) && !Args.hasArg(options::OPT_O0) &&
+!Args.hasArg(options::OPT_O4) && !Args.hasArg(options::OPT_Ofast))
+  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O),

One hasArg might be used for all.



Comment at: lib/Driver/ToolChains/AMDGPU.h:50
   Tool *buildLinker() const override;
+  const std::string getOptionDefault(options::ID OptID) const {
+auto opt = OptionsDefault.find(OptID);

const Ref might be returned here.



Comment at: lib/Driver/ToolChains/AMDGPU.h:51-52
+  const std::string getOptionDefault(options::ID OptID) const {
+auto opt = OptionsDefault.find(OptID);
+return opt->second;
+  }

Check on not found opt is needed.



Comment at: lib/Driver/ToolChains/AMDGPU.h:60
   bool IsIntegratedAssemblerDefault() const override { return true; }
+  llvm::opt::DerivedArgList *
+  TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,

Return arg on the same line.


https://reviews.llvm.org/D37386



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37386: [AMDGPU] Implement infrastructure to set options in AMDGPUToolChain

2017-09-05 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov accepted this revision.
emankov added a comment.

LGTM


https://reviews.llvm.org/D37386



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60220: [CUDA][Windows] Final fix for bug 38811 (Step 3 of 3)

2019-10-28 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov added a comment.

In D60220#1723350 , @6yearold wrote:

> I'm seeing quite similar errors on FreeBSD with Clang 8 and 9:
>  Any idea how to fix this?


It looks like CUDA doesn't support `double` argument for device function 
__isnan on FreeBSD.

1. I'd look at LLVM trunk (10.0.0svn).
2. If the issue were not eliminated in the trunk, I'd make a change for FreeBSD 
similar to https://reviews.llvm.org/rL358654 to provide declarations for that 
function, allowing math_functions.hpp to compile, but with preventing from any 
use of it on the device side.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60220/new/

https://reviews.llvm.org/D60220



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60220: [CUDA][Windows] Final fix for bug 38811 (Step 3 of 3)

2019-10-28 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov added a comment.

>> It looks like CUDA doesn't support `double` argument for device function 
>> __isnan on FreeBSD.
> 
> It's actually the opposite -- FreeBSD does not provide *host*-side 
> `__isnan(double)` -- the error complains that it's the host code that tried 
> to use `__isnan` and failed when overload resolution produced a device 
> variant.

Sure, you right, quite opposite: `call to __device__ function from __host__ 
function`. I agree: the wrapper for the existing `__inline_isnan()` is the 
solution.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60220/new/

https://reviews.llvm.org/D60220



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60220: [CUDA][Windows] Final fix for bug 38811 (Step 3 of 3)

2019-10-29 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov added a comment.

In D60220#1725633 , @dim wrote:

>   $ cat check-isnan.cpp
>   #include 
>  
>   int check_isnan(double d)
>   {
> return ::__isnan(d);
>   }
>   $ clang -c check-isnan.cpp
>   Why can't the regular `isnan` be used instead?  Or is this a CUDA-specific 
> requirement?  (Apologies, but I know next to nothing about CUDA :) )




1. `#include "cuda_runtime.h"`
2. as long as `__isnan` is a devide function, it should be called from 
`__devide__` or `__global__` function
3. `clang -c check-isnan.cpp -x cuda`


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60220/new/

https://reviews.llvm.org/D60220



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D122897: [clang][CUDA][Windows] Fix compilation error on Windows with `uint32_t __nvvm_get_smem_pointer`

2022-04-20 Thread Evgeny Mankov via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGc23147106f7e: [clang][CUDA][Windows] Fix compilation error 
on Windows with `uint32_t… (authored by emankov).
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D122897/new/

https://reviews.llvm.org/D122897

Files:
  clang/lib/Headers/__clang_cuda_intrinsics.h


Index: clang/lib/Headers/__clang_cuda_intrinsics.h
===
--- clang/lib/Headers/__clang_cuda_intrinsics.h
+++ clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -509,7 +509,7 @@
 __device__ inline void *__nv_cvta_local_to_generic_impl(size_t __ptr) {
   return (void *)(void __attribute__((address_space(5))) *)__ptr;
 }
-__device__ inline uint32_t __nvvm_get_smem_pointer(void *__ptr) {
+__device__ inline cuuint32_t __nvvm_get_smem_pointer(void *__ptr) {
   return __nv_cvta_generic_to_shared_impl(__ptr);
 }
 } // extern "C"


Index: clang/lib/Headers/__clang_cuda_intrinsics.h
===
--- clang/lib/Headers/__clang_cuda_intrinsics.h
+++ clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -509,7 +509,7 @@
 __device__ inline void *__nv_cvta_local_to_generic_impl(size_t __ptr) {
   return (void *)(void __attribute__((address_space(5))) *)__ptr;
 }
-__device__ inline uint32_t __nvvm_get_smem_pointer(void *__ptr) {
+__device__ inline cuuint32_t __nvvm_get_smem_pointer(void *__ptr) {
   return __nv_cvta_generic_to_shared_impl(__ptr);
 }
 } // extern "C"
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D89752: [CUDA] Improve clang's ability to detect recent CUDA versions.

2020-10-20 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov added inline comments.



Comment at: clang/lib/Driver/ToolChains/Cuda.cpp:158
 if (!VersionFile) {
-  // CUDA 7.0 doesn't have a version.txt, so guess that's our version if
-  // version.txt isn't present.
-  Version = CudaVersion::CUDA_70;
+  // CUDA 7.0 and CUDA 11.1+ do not have version.txt file.
+  // Use libdevice file to distinguish 7.0 from the new versions.

CUDA 11.0+, actually



Comment at: clang/lib/Driver/ToolChains/Cuda.cpp:161
+  if (FS.exists(LibDevicePath + "/libdevice.10.bc")) {
+Version = CudaVersion::LATEST;
+DetectedVersionIsNotSupported = Version > 
CudaVersion::LATEST_SUPPORTED;

Do we have any other mechanism besides version.txt for determining an exact 
CUDA version? Setting the latest version in case of absence of version.txt 
doesn't suit all the needs: sometimes the exact version is taken into account, 
for instance in [[ https://github.com/ROCm-Developer-Tools/HIPIFY#clang | 
hipify-clang ]].



Comment at: clang/test/Driver/cuda-version-check.cu:13
 // RUN:FileCheck %s --check-prefix=UNKNOWN_VERSION
+// CUDA-11.1 does not carry version.txt file. Make sure we still detect it as a
+// new version and handle it the same as we handle other new CUDA versions.

CUDA-11.0 Update 1 doesn't carry version.txt as well.



Comment at: clang/test/Driver/cuda-version-check.cu:72
 
-// UNKNOWN_VERSION: Unknown CUDA version 999.999. Assuming the latest 
supported version
+// UNKNOWN_VERSION: Unknown CUDA version{{.*}}. Assuming the latest supported 
version
 // UNKNOWN_VERSION_CXX-NOT: Unknown CUDA version

missing space before {{


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89752/new/

https://reviews.llvm.org/D89752

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D89752: [CUDA] Improve clang's ability to detect recent CUDA versions.

2020-10-21 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov accepted this revision.
emankov added a comment.
This revision is now accepted and ready to land.

Thank you!
I've successfully applied your change in `Cuda.cpp` to the following clang 
versions: `10.0.0`, `10.0.1`, `11.0.0`, and `12.0.0git`; and have created the 
working patches (#206 
).




Comment at: clang/test/Driver/cuda-version-check.cu:13
 // RUN:FileCheck %s --check-prefix=UNKNOWN_VERSION
+// CUDA-11.1 does not carry version.txt file. Make sure we still detect it as a
+// new version and handle it the same as we handle other new CUDA versions.

tra wrote:
> emankov wrote:
> > CUDA-11.0 Update 1 doesn't carry version.txt as well.
> > CUDA-11.0 Update 1 doesn't carry version.txt as well.
> 
> This assertion appears to be incorrect. I've just installed 11.0 update1 
> using Ubuntu [[ 
> https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/cuda_11.0.3_450.51.06_linux.run
>  | .run ]] installer I've got from 
> https://developer.nvidia.com/cuda-11.0-update1-download-archive  and the 
> installed version does have version.txt saying 
> 
> ```
> CUDA Version 11.0.228
> ```
> 
> Did you use some other installer variant?
> 
```
"c:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\bin\nvcc" --version   

  nvcc: NVIDIA (R) Cuda compiler driver 

  
Copyright (c) 2005-2020 NVIDIA Corporation  

Built on 
Wed_Jul_22_19:09:35_Pacific_Daylight_Time_2020  

   Cuda compilation tools, release 
11.0, V11.0.221 

Build cuda_11.0_bu.relgpu_drvr445TC445_37.28845127_0
 
```
CUDA Version 11.0.221 (distributive [[ 
http://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/cuda_11.0.3_451.82_win10.exe
 | cuda_11.0.3_451.82_win10.exe ]]) doesn't have version.txt as well.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89752/new/

https://reviews.llvm.org/D89752

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D89752: [CUDA] Improve clang's ability to detect recent CUDA versions.

2020-10-21 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov added inline comments.



Comment at: clang/lib/Driver/ToolChains/Cuda.cpp:161
+  if (FS.exists(LibDevicePath + "/libdevice.10.bc")) {
+Version = CudaVersion::LATEST;
+DetectedVersionIsNotSupported = Version > 
CudaVersion::LATEST_SUPPORTED;

tra wrote:
> emankov wrote:
> > Do we have any other mechanism besides version.txt for determining an exact 
> > CUDA version? Setting the latest version in case of absence of version.txt 
> > doesn't suit all the needs: sometimes the exact version is taken into 
> > account, for instance in [[ 
> > https://github.com/ROCm-Developer-Tools/HIPIFY#clang | hipify-clang ]].
> Not easily. 
> 
> We could try running one of SDK binaries with `--version`. This would be 
> fragile as the tool version does not necessarily match the SDK's  and NVIDIA 
> has already started versioning elements per-component. E.g. some shared 
> libraries in 11.1 are already versioned as 10.2, 11.0, 11.1 and 11.2. There 
> are also situations when we had to cherry-pick a tool from a different 
> release in order to work around a critical bug. We don't want to change 
> compiler's idea of CUDA version based on that.
> 
> We could parse CUDA headers and try finding CUDA_VERSION macro. That's 
> feasible. It may be somewhat fragile if we just search for a text string 
> '#define CUDA_VERSION ' -- nvidia may change it. On the other hand it's 
> not that much worse than relying on version.txt.  It also does not carry 
> complete version, only major.minor, so we will not be able to tell `11.0 
> `apart from `11.0 update 1`. Probably not a big deal. I don't think we've 
> needed it so far.
> 
> We could do something like this:
> * if version.txt is there -- use it
> * otherwise if cuda.h is found, extract version from CUDA_VERSION macro
> * if that failed, use libdevice bitcode format to detect CUDA-7.0
> * fall back to "last supported version" otherwise
> 
I'd appreciate the appearance of the step with a version extraction from 
CUDA_VERSION macro.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89752/new/

https://reviews.llvm.org/D89752

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D89752: [CUDA] Improve clang's ability to detect recent CUDA versions.

2020-10-21 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov added inline comments.



Comment at: clang/lib/Driver/ToolChains/Cuda.cpp:161
+  if (FS.exists(LibDevicePath + "/libdevice.10.bc")) {
+Version = CudaVersion::LATEST;
+DetectedVersionIsNotSupported = Version > 
CudaVersion::LATEST_SUPPORTED;

tra wrote:
> emankov wrote:
> > tra wrote:
> > > emankov wrote:
> > > > Do we have any other mechanism besides version.txt for determining an 
> > > > exact CUDA version? Setting the latest version in case of absence of 
> > > > version.txt doesn't suit all the needs: sometimes the exact version is 
> > > > taken into account, for instance in [[ 
> > > > https://github.com/ROCm-Developer-Tools/HIPIFY#clang | hipify-clang ]].
> > > Not easily. 
> > > 
> > > We could try running one of SDK binaries with `--version`. This would be 
> > > fragile as the tool version does not necessarily match the SDK's  and 
> > > NVIDIA has already started versioning elements per-component. E.g. some 
> > > shared libraries in 11.1 are already versioned as 10.2, 11.0, 11.1 and 
> > > 11.2. There are also situations when we had to cherry-pick a tool from a 
> > > different release in order to work around a critical bug. We don't want 
> > > to change compiler's idea of CUDA version based on that.
> > > 
> > > We could parse CUDA headers and try finding CUDA_VERSION macro. That's 
> > > feasible. It may be somewhat fragile if we just search for a text string 
> > > '#define CUDA_VERSION ' -- nvidia may change it. On the other hand 
> > > it's not that much worse than relying on version.txt.  It also does not 
> > > carry complete version, only major.minor, so we will not be able to tell 
> > > `11.0 `apart from `11.0 update 1`. Probably not a big deal. I don't think 
> > > we've needed it so far.
> > > 
> > > We could do something like this:
> > > * if version.txt is there -- use it
> > > * otherwise if cuda.h is found, extract version from CUDA_VERSION macro
> > > * if that failed, use libdevice bitcode format to detect CUDA-7.0
> > > * fall back to "last supported version" otherwise
> > > 
> > I'd appreciate the appearance of the step with a version extraction from 
> > CUDA_VERSION macro.
> D89832 does exactly that. PTAL.
Ok, I'll try it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89752/new/

https://reviews.llvm.org/D89752

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D89832: [CUDA] Extract CUDA version from cuda.h if version.txt is not found

2020-10-22 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov requested changes to this revision.
emankov added inline comments.
This revision now requires changes to proceed.



Comment at: clang/lib/Driver/ToolChains/Cuda.cpp:76-77
+return CudaVersion::CUDA_102;
+  if (raw_version < 11010)
+return CudaVersion::CUDA_110;
+  return CudaVersion::LATEST;

Please, add `CudaVersion::CUDA_111` declaration in `Cuda.h` and a corresponding 
`if` here. 
Btw, `switch` is possible here. 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89832/new/

https://reviews.llvm.org/D89832

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D89832: [CUDA] Extract CUDA version from cuda.h if version.txt is not found

2020-10-22 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov accepted this revision.
emankov added inline comments.
This revision is now accepted and ready to land.



Comment at: clang/lib/Driver/ToolChains/Cuda.cpp:76-77
+return CudaVersion::CUDA_102;
+  if (raw_version < 11010)
+return CudaVersion::CUDA_110;
+  return CudaVersion::LATEST;

tra wrote:
> emankov wrote:
> > Please, add `CudaVersion::CUDA_111` declaration in `Cuda.h` and a 
> > corresponding `if` here. 
> > Btw, `switch` is possible here. 
> It does not serve any purpose here. 102/110 were added when clang was only 
> accepting specific versions. Now that it will accept any newer version,  
> Arguably it's 102/101 that should be gone until we implement the new 
> functionality. All of that would out of scope for this patch.
> 
> As for the switch, it would only work to match exact versions encoded in the 
> CUDA headers, including updates, patches, special private builds etc. I do 
> not have access to all of those versions, so I can not enumerate all of them. 
> Range checking is more robust.
Ok, I can "live" with the latest 110 for a while till the appearing of the new 
functionality in clang. Thanks!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89832/new/

https://reviews.llvm.org/D89832

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D89832: [CUDA] Extract CUDA version from cuda.h if version.txt is not found

2020-10-23 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov added a comment.

I confirm that D89752  eliminates 47332 
 on Windows. Tested against the 
following CUDA versions: 7.0, 7,5, 9,2, 10.0, 10,1, 10.2, 11.0 Update 1, 11.1.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89832/new/

https://reviews.llvm.org/D89832

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D89832: [CUDA] Extract CUDA version from cuda.h if version.txt is not found

2020-10-26 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov added a comment.

D89832  eliminates 47332 
 on Windows as well. Tested 
against the same CUDA versions as for D89752 .


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89832/new/

https://reviews.llvm.org/D89832

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107492: [clang] Replace asm with __asm__ in cuda header

2021-08-04 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov requested changes to this revision.
emankov added inline comments.
This revision now requires changes to proceed.



Comment at: clang/lib/Headers/__clang_cuda_device_functions.h:1043
 }
-#else // CUDA_VERSION >= 9020
+#else  // CUDA_VERSION >= 9020
 // CUDA no longer provides inline assembly (or bitcode) implementation of these

Unneeded formatting.



Comment at: clang/lib/Headers/__clang_cuda_device_functions.h:1057
+  __asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
+  : "=r"(r)
+  : "r"(__a), "r"(0), "r"(0));

Tabs are not allowed, please use whitespaces instead.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107492/new/

https://reviews.llvm.org/D107492

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107492: [clang] Replace asm with __asm__ in cuda header

2021-08-04 Thread Evgeny Mankov via Phabricator via cfe-commits
emankov added inline comments.



Comment at: clang/lib/Headers/__clang_cuda_device_functions.h:1043
 }
-#else // CUDA_VERSION >= 9020
+#else  // CUDA_VERSION >= 9020
 // CUDA no longer provides inline assembly (or bitcode) implementation of these

JonChesterfield wrote:
> JonChesterfield wrote:
> > emankov wrote:
> > > Unneeded formatting.
> > Correct formatting though. This is what clang-format did to the whole file. 
> > I'll revert the space in favour of git-clang-format if you prefer
> git-clang-format also wanted to insert the space here and I then had to use 
> -nolint. However hopefully that removes an async round trip in the review 
> chain. Some of the previous lines were 81 characters. I suppose we could 
> apply a whitespace-only clang-format patch first but it doesn't seem worth 
> the time.
The point is that this formatting is not related to the change.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107492/new/

https://reviews.llvm.org/D107492

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits