Re: [PR] [Enhancement] Refactor cubin launcher [tvm-ffi]

via GitHub Thu, 18 Dec 2025 00:27:26 -0800


oraluben commented on code in PR #300:
URL: https://github.com/apache/tvm-ffi/pull/300#discussion_r2630066118



##########
include/tvm/ffi/extra/cuda/unify_api.h:
##########
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_FFI_EXTRA_CUDA_UNIFY_API_H_
+#define TVM_FFI_EXTRA_CUDA_UNIFY_API_H_
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <tvm/ffi/error.h>
+#include <tvm/ffi/extra/cuda/base.h>
+
+#include <filesystem>
+#include <string>
+
+#ifndef TVM_FFI_CUBIN_LAUNCHER_USE_DRIVER_API
+#if CUDART_VERSION >= 12080
+// Use Runtime API by default if possible
+#define TVM_FFI_CUBIN_LAUNCHER_USE_DRIVER_API 0
+#else
+#define TVM_FFI_CUBIN_LAUNCHER_USE_DRIVER_API 1
+#endif  // CUDART_VERSION >= 12080
+#else
+#if (!(TVM_FFI_CUBIN_LAUNCHER_USE_DRIVER_API)) && (CUDART_VERSION < 12080)
+#define _STRINGIFY(x) #x
+#define STR(x) _STRINGIFY(x)
+static_assert(false, "Runtime API only supported for CUDA >= 12.8, got CUDA 
Runtime version: " STR(
+                         CUDART_VERSION));
+#endif
+#endif
+
+namespace tvm::ffi {
+
+#if TVM_FFI_CUBIN_LAUNCHER_USE_DRIVER_API
+
+using StreamHandle = CUstream;
+using ResultHandle = CUresult;
+
+using LibraryHandle = CUlibrary;
+using KernelHandle = CUkernel;
+using LaunchConfigHandle = CUlaunchConfig;
+using LaunchAttrHandle = CUlaunchAttribute;
+
+using DeviceAttrHandle = CUdevice_attribute;
+using DeviceHandle = CUdevice;
+
+#define FFI_CUDA_SUCCESS CUDA_SUCCESS
+
+#define load_function cuLibraryGetKernel
+#define get_device_count cuDeviceGetCount
+#define get_device_attr cuDeviceGetAttribute
+#define unload_library cuLibraryUnload
+
+#else
+
+using StreamHandle = cudaStream_t;
+using ResultHandle = cudaError_t;
+
+using LibraryHandle = cudaLibrary_t;
+using KernelHandle = cudaKernel_t;
+using LaunchConfigHandle = cudaLaunchConfig_t;
+using LaunchAttrHandle = cudaLaunchAttribute;
+
+using DeviceAttrHandle = cudaDeviceAttr;
+using DeviceHandle = int;
+
+#define FFI_CUDA_SUCCESS cudaSuccess
+
+#define load_function cudaLibraryGetKernel
+#define get_device_count cudaGetDeviceCount
+#define get_device_attr cudaDeviceGetAttribute
+#define unload_library cudaLibraryUnload
+
+#endif
+
+#define TVM_FFI_CHECK_RUNTIME_CUDA_ERROR(stmt)                                 
     \
+  do {                                                                         
     \
+    cudaError_t __err = (stmt);                                                
     \
+    if (__err != cudaSuccess) {                                                
     \
+      const char* __err_name = cudaGetErrorName(__err);                        
     \
+      const char* __err_str = cudaGetErrorString(__err);                       
     \
+      TVM_FFI_THROW(RuntimeError) << "CUDA Runtime Error: " << __err_name << " 
("   \
+                                  << static_cast<int>(__err) << "): " << 
__err_str; \
+    }                                                                          
     \
+  } while (0)
+
+#define TVM_FFI_CHECK_DRIVER_CUDA_ERROR(stmt)                                  
\
+  do {                                                                         
\
+    CUresult __err = (stmt);                                                   
\
+    if (__err != CUDA_SUCCESS) {                                               
\
+      const char *name, *info;                                                 
\
+      cuGetErrorName(__err, &name);                                            
\
+      cuGetErrorString(__err, &info);                                          
\
+      TVM_FFI_THROW(RuntimeError) << "CUDA Driver Error: " << name << " ("     
\
+                                  << static_cast<int>(__err) << "): " << info; 
\
+    }                                                                          
\
+  } while (0)
+
+static ResultHandle load_image(LibraryHandle* library, const void* image) {
+#if TVM_FFI_CUBIN_LAUNCHER_USE_DRIVER_API
+  return cuLibraryLoadData(library, image, nullptr, nullptr, 0, nullptr, 
nullptr, 0);
+#else
+  return cudaLibraryLoadData(library, image, nullptr, nullptr, 0, nullptr, 
nullptr, 0);
+#endif
+}
+
+static DeviceHandle idx_to_device(int idx) {
+#if TVM_FFI_CUBIN_LAUNCHER_USE_DRIVER_API
+  CUdevice o;
+  TVM_FFI_CHECK_DRIVER_CUDA_ERROR(cuDeviceGet(&o, idx));
+  return o;
+#else
+  return idx;
+#endif
+}
+
+static ResultHandle launch_kernel(KernelHandle kernel, void** args, 
tvm::ffi::dim3 grid,
+                                  tvm::ffi::dim3 block, StreamHandle stream,
+                                  uint32_t dyn_smem_bytes = 0) {
+#if TVM_FFI_CUBIN_LAUNCHER_USE_DRIVER_API
+  return cuLaunchKernel(reinterpret_cast<CUfunction>(kernel), grid.x, grid.y, 
grid.z, block.x,

Review Comment:
   Based on my understanding, there's two categories of CUDA Driver API that 
could load and launch a cuda kernel independently: 
   1. [Module 
API](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MODULE.html)
   2. [Library 
API](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__LIBRARY.html)
   
   The Library API is almost how cudart do in its library API ([6.35. 
Interactions with the CUDA Driver 
API](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DRIVER.html)):
 
   > **Interactions between CUfunction and cudaFunction_t**
   > 
   > The types 
[CUfunction](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1gba6128b948022f495706d93bc2cea9c8)
 and 
[cudaFunction_t](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g6ac3a22cc596d09ac07cdafb8a4638cf)
 represent the same data type and may be used interchangeably by casting the 
two types between each other.
   > 
   > In order to use a 
[cudaFunction_t](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g6ac3a22cc596d09ac07cdafb8a4638cf)
 in a CUDA Driver API function which takes a 
[CUfunction](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1gba6128b948022f495706d93bc2cea9c8),
 it is necessary to explicitly cast the 
[cudaFunction_t](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g6ac3a22cc596d09ac07cdafb8a4638cf)
 to a 
[CUfunction](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1gba6128b948022f495706d93bc2cea9c8).
   > 
   > **Interactions between CUkernel and cudaKernel_t**
   > 
   > The types 
[CUkernel](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1g612028921e5736db673e4307589989ed)
 and 
[cudaKernel_t](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g0b33f204b307b3154aa4f005a3c8a46a)
 represent the same data type and may be used interchangeably by casting the 
two types between each other.
   > 
   > In order to use a 
[cudaKernel_t](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g0b33f204b307b3154aa4f005a3c8a46a)
 in a CUDA Driver API function which takes a 
[CUkernel](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1g612028921e5736db673e4307589989ed),
 it is necessary to explicitly cast the 
[cudaKernel_t](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g0b33f204b307b3154aa4f005a3c8a46a)
 to a 
[CUkernel](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1g612028921e5736db673e4307589989ed).
   
   Since we were using the [library 
API](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__LIBRARY.html) 
(starting from CUDA 12.8) here, I think it's safe that we use the driver 
version (library/kernel) also in the driver-api version.
   
   
   -----
   
   > the reinterpret_cast here might not be proper since they are different 
things.
   
   In the [doc of 
`cuLaunchKernel`](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__EXEC.html#group__CUDA__EXEC_1gb8f3dc3031b40da29d5f9a7139e52e15):
   > Note that the API can also be used to launch context-less kernel 
[CUkernel](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1g612028921e5736db673e4307589989ed)
 by querying the handle using 
[cuLibraryGetKernel()](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__LIBRARY.html#group__CUDA__LIBRARY_1g15336d865f5abd63e3dc6004d5bc037a)
 and then passing it to the API by casting to 
[CUfunction](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1gba6128b948022f495706d93bc2cea9c8).
 Here, the context to launch the kernel on will either be taken from the 
specified stream hStream or the current context in case of NULL stream.
   
   I think this is the designed usage, and the signature of `cuLaunchKernel` 
only takes `CUfunction`, users should explicit convert `CUkernel` to 
`CUfunction` to call `cuLaunchKernel.
   
   So I tend to keep this part unchanged unless there's other issue, e.g. 
performance difference comes from context-awareness.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [Enhancement] Refactor cubin launcher [tvm-ffi]

Reply via email to