yaxunl created this revision. yaxunl added reviewers: tra, b-sumner. yaxunl requested review of this revision.
ROCm 4.5 device library introduced `__ockl_dm_alloc` and `__oclk_dm_dealloc` for supporting device side malloc/free. This patch redefines device malloc/free to use these functions. It also fixes a bug in the wrapper header which incorrectly defines free with return type void* instead of void. https://reviews.llvm.org/D116967 Files: clang/lib/Headers/__clang_hip_runtime_wrapper.h clang/test/Headers/hip-header.hip
Index: clang/test/Headers/hip-header.hip =================================================================== --- clang/test/Headers/hip-header.hip +++ clang/test/Headers/hip-header.hip @@ -4,7 +4,7 @@ // RUN: -internal-isystem %S/Inputs/include \ // RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \ -// RUN: -D__HIPCC_RTC__ | FileCheck %s +// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,NOMALLOC %s // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ // RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ // RUN: -internal-isystem %S/Inputs/include \ @@ -25,6 +25,13 @@ // RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \ // RUN: -D__HIPCC_RTC__ -std=c++14 | FileCheck -check-prefixes=CHECK,CXX14 %s +// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ +// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ +// RUN: -internal-isystem %S/Inputs/include \ +// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ +// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \ +// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \ +// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,MALLOC %s // expected-no-diagnostics @@ -120,14 +127,18 @@ #include <cstdlib> // CHECK-LABEL: define{{.*}}@_Z11test_malloc // CHECK: call {{.*}}i8* @malloc(i64 -// CHECK: define weak {{.*}}i8* @malloc(i64 +// CHECK-LABEL: define weak {{.*}}i8* @malloc(i64 +// MALLOC: call i64 @__ockl_dm_alloc +// NOMALLOC: call void @llvm.trap __device__ void test_malloc(void *a) { a = malloc(42); } // CHECK-LABEL: define{{.*}}@_Z9test_free -// CHECK: call {{.*}}i8* @free(i8* -// CHECK: define weak {{.*}}i8* @free(i8* +// CHECK: call {{.*}}void @free(i8* +// CHECK-LABEL: define weak {{.*}}void @free(i8* +// MALLOC: call void @__ockl_dm_dealloc +// NOMALLOC: call void @llvm.trap __device__ void test_free(void *a) { free(a); } Index: clang/lib/Headers/__clang_hip_runtime_wrapper.h =================================================================== --- clang/lib/Headers/__clang_hip_runtime_wrapper.h +++ clang/lib/Headers/__clang_hip_runtime_wrapper.h @@ -50,6 +50,9 @@ #include <cmath> #include <cstdlib> #include <stdlib.h> +#if __has_include("hip/hip_version.h") +#include "hip/hip_version.h" +#endif // __has_include("hip/hip_version.h") #else typedef __SIZE_TYPE__ size_t; // Define macros which are needed to declare HIP device API's without standard @@ -74,25 +77,35 @@ extern "C" { #endif //__cplusplus +#if HIP_VERSION_MAJOR > 4 || (HIP_VERSION_MAJOR == 4 && HIP_VERSION_MINOR >= 5) +extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size); +extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr); +__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { + return (void *) __ockl_dm_alloc(__size); +} +__attribute__((weak)) inline __device__ void free(void *__ptr) { + __ockl_dm_dealloc((unsigned long long)__ptr); +} +#else // HIP version check #if __HIP_ENABLE_DEVICE_MALLOC__ __device__ void *__hip_malloc(__hip_size_t __size); __device__ void *__hip_free(void *__ptr); __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { return __hip_malloc(__size); } -__attribute__((weak)) inline __device__ void *free(void *__ptr) { - return __hip_free(__ptr); +__attribute__((weak)) inline __device__ void free(void *__ptr) { + __hip_free(__ptr); } #else __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { __builtin_trap(); return (void *)0; } -__attribute__((weak)) inline __device__ void *free(void *__ptr) { +__attribute__((weak)) inline __device__ void free(void *__ptr) { __builtin_trap(); - return (void *)0; } #endif +#endif // HIP version check #ifdef __cplusplus } // extern "C"
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits