Author: Artem Belevich Date: 2023-05-19T11:48:08-07:00 New Revision: 4450285bd74079bf87ba7b824a8dec8dcfb586ef
URL: https://github.com/llvm/llvm-project/commit/4450285bd74079bf87ba7b824a8dec8dcfb586ef DIFF: https://github.com/llvm/llvm-project/commit/4450285bd74079bf87ba7b824a8dec8dcfb586ef.diff LOG: [CUDA] provide wrapper functions for new NVCC builtins. For sm_80 NVCC introduced a handful of builtins with the names that deviate from the historic __nvvm_/__nv naming convention. Clang/LLVM does provide equivalent builtins, but using different names. This patch maps NVCC-style builtins to their clang counterparts. Differential Revision: https://reviews.llvm.org/D150894 Added: Modified: clang/lib/Headers/__clang_cuda_intrinsics.h Removed: ################################################################################ diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h index b87413e12a272..d085bf6536efc 100644 --- a/clang/lib/Headers/__clang_cuda_intrinsics.h +++ b/clang/lib/Headers/__clang_cuda_intrinsics.h @@ -512,6 +512,78 @@ __device__ inline void *__nv_cvta_local_to_generic_impl(size_t __ptr) { __device__ inline cuuint32_t __nvvm_get_smem_pointer(void *__ptr) { return __nv_cvta_generic_to_shared_impl(__ptr); } + +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800 +__device__ inline unsigned __reduce_add_sync_unsigned_impl(unsigned __mask, + unsigned __value) { + return __nvvm_redux_sync_add(__mask, __value); +} +__device__ inline int __reduce_add_sync_signed_impl(unsigned __mask, + int __value) { + return __nvvm_redux_sync_add(__mask, __value); +} +__device__ inline unsigned __reduce_min_sync_unsigned_impl(unsigned __mask, + unsigned __value) { + return __nvvm_redux_sync_umin(__mask, __value); +} +__device__ inline unsigned __reduce_max_sync_unsigned_impl(unsigned __mask, + unsigned __value) { + return __nvvm_redux_sync_umax(__mask, __value); +} +__device__ inline int __reduce_min_sync_signed_impl(unsigned __mask, + int __value) { + return __nvvm_redux_sync_min(__mask, __value); +} +__device__ inline int __reduce_max_sync_signed_impl(unsigned __mask, + int __value) { + return __nvvm_redux_sync_max(__mask, __value); +} +__device__ inline unsigned __reduce_or_sync_unsigned_impl(unsigned __mask, + unsigned __value) { + return __nvvm_redux_sync_or(__mask, __value); +} +__device__ inline unsigned __reduce_and_sync_unsigned_impl(unsigned __mask, + unsigned __value) { + return __nvvm_redux_sync_and(__mask, __value); +} +__device__ inline unsigned __reduce_xor_sync_unsigned_impl(unsigned __mask, + unsigned __value) { + return __nvvm_redux_sync_xor(__mask, __value); +} + +__device__ inline void +__nv_memcpy_async_shared_global_4_impl(void *__dst, const void *__src, + unsigned __src_size) { + __nvvm_cp_async_ca_shared_global_4( + (void __attribute__((address_space(3))) *)__dst, + (const void __attribute__((address_space(1))) *)__src, __src_size); +} +__device__ inline void +__nv_memcpy_async_shared_global_8_impl(void *__dst, const void *__src, + unsigned __src_size) { + __nvvm_cp_async_ca_shared_global_8( + (void __attribute__((address_space(3))) *)__dst, + (const void __attribute__((address_space(1))) *)__src, __src_size); +} +__device__ inline void +__nv_memcpy_async_shared_global_16_impl(void *__dst, const void *__src, + unsigned __src_size) { + __nvvm_cp_async_ca_shared_global_16( + (void __attribute__((address_space(3))) *)__dst, + (const void __attribute__((address_space(1))) *)__src, __src_size); +} + +__device__ inline void * +__nv_associate_access_property_impl(const void *__ptr, + unsigned long long __prop) { + // TODO: it appears to provide compiler with some sort of a hint. We do not + // know what exactly it is supposed to do. However, CUDA headers suggest that + // just passing through __ptr should not affect correctness. They do so on + // pre-sm80 GPUs where this builtin is not available. + return (void*)__ptr; +} +#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800 + } // extern "C" #endif // CUDA_VERSION >= 11000 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits