Author: Jon Chesterfield Date: 2021-01-15T02:16:33Z New Revision: 214387c2c694c92fec713f7ad224f10c1aebc1cf
URL: https://github.com/llvm/llvm-project/commit/214387c2c694c92fec713f7ad224f10c1aebc1cf DIFF: https://github.com/llvm/llvm-project/commit/214387c2c694c92fec713f7ad224f10c1aebc1cf.diff LOG: [libomptarget][nvptx] Reduce calls to cuda header [libomptarget][nvptx] Reduce calls to cuda header Remove use of clock_t in favour of a builtin. Drop a preprocessor branch. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D94731 Added: Modified: openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu Removed: ################################################################################ diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu index 7e81aba4152d..b68d3265a758 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu @@ -56,7 +56,6 @@ DEVICE double __kmpc_impl_get_wtime() { } // In Cuda 9.0, __ballot(1) from Cuda 8.0 is replaced with __activemask(). - DEVICE __kmpc_impl_lanemask_t __kmpc_impl_activemask() { #if CUDA_VERSION >= 9000 return __activemask(); @@ -66,7 +65,6 @@ DEVICE __kmpc_impl_lanemask_t __kmpc_impl_activemask() { } // In Cuda 9.0, the *_sync() version takes an extra argument 'mask'. - DEVICE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t Mask, int32_t Var, int32_t SrcLane) { #if CUDA_VERSION >= 9000 @@ -86,14 +84,7 @@ DEVICE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t Mask, #endif // CUDA_VERSION } -DEVICE void __kmpc_impl_syncthreads() { - // Use original __syncthreads if compiled by nvcc or clang >= 9.0. -#if !defined(__clang__) || __clang_major__ >= 9 - __syncthreads(); -#else - asm volatile("bar.sync %0;" : : "r"(0) : "memory"); -#endif // __clang__ -} +DEVICE void __kmpc_impl_syncthreads() { __syncthreads(); } DEVICE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) { #if CUDA_VERSION >= 9000 @@ -145,11 +136,11 @@ DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *lock) { DEVICE void __kmpc_impl_set_lock(omp_lock_t *lock) { // TODO: not sure spinning is a good idea here.. while (__kmpc_atomic_cas(lock, UNSET, SET) != UNSET) { - clock_t start = clock(); - clock_t now; + int32_t start = __nvvm_read_ptx_sreg_clock(); + int32_t now; for (;;) { - now = clock(); - clock_t cycles = now > start ? now - start : now + (0xffffffff - start); + now = __nvvm_read_ptx_sreg_clock(); + int32_t cycles = now > start ? now - start : now + (0xffffffff - start); if (cycles >= __OMP_SPIN * GetBlockIdInKernel()) { break; } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits