Ubospica opened a new issue, #264:
URL: https://github.com/apache/tvm-ffi/issues/264
Now when the tvm function returns `tvm::ffi::Tensor`, the script will
segfault when the tensor is deallocated. I am further investigating the
reasons, but one possible reason could be the tensor is deallocated for
multiple times.
Reproducible script:
```python
import tvm_ffi
import tvm_ffi.cpp
import numpy as np
kernel_source = r"""
#include <tvm/ffi/container/tensor.h>
#include <tvm/ffi/container/shape.h>
#include <tvm/ffi/dtype.h>
#include <tvm/ffi/error.h>
#include <cstdlib>
#include <cstring>
// Simple CPU allocator
struct CPUNDAlloc {
void AllocData(DLTensor* tensor) {
tensor->data = malloc(tvm::ffi::GetDataSize(*tensor));
}
void FreeData(DLTensor* tensor) {
if (tensor->data) {
free(tensor->data);
}
}
};
// Value-return style: returns a new Tensor instead of writing to output
parameter
tvm::ffi::Tensor add_one_value_return(tvm::ffi::TensorView x) {
TVM_FFI_ICHECK(x.ndim() == 1) << "x must be a 1D tensor";
DLDataType f32_dtype{kDLFloat, 32, 1};
TVM_FFI_ICHECK(x.dtype() == f32_dtype) << "x must be float32";
int n = x.size(0);
// Allocate new tensor for output
tvm::ffi::Shape output_shape = {n};
DLDevice device = {kDLCPU, 0};
tvm::ffi::Tensor output = tvm::ffi::Tensor::FromNDAlloc(
CPUNDAlloc(), output_shape, f32_dtype, device);
// Compute: output = x + 1
const float* input_data = static_cast<const float*>(x.data_ptr());
float* output_data = static_cast<float*>(output.data_ptr());
for (int i = 0; i < n; ++i) {
output_data[i] = input_data[i] + 1.0f;
}
return output; // Return by value
}
"""
mod = tvm_ffi.cpp.load_inline(
name="segv_test",
cpp_sources=kernel_source,
functions=["add_one_value_return"],
)
# Create input tensor
input_array = np.array([1.0, 2.0, 3.0, 4.0, 5.0], dtype=np.float32)
# Call the kernel - this should return a new tensor
# Call the function directly from module
output_tensor = mod.add_one_value_return(input_array)
print(f"Output tensor type: {type(output_tensor)}")
print(f"Output tensor: {output_tensor}")
# Convert DLPack tensor to numpy array
output_array = np.from_dlpack(output_tensor)
print(f"Output array: {output_array}")
```
The print statements are providing correct results. But the program crashes
at the end. Result:
```
Output tensor type: <class 'tvm_ffi.core.Tensor'>
Output tensor: Tensor(97321732471040)
Output array: [2. 3. 4. 5. 6.]
!!!!!!! Segfault encountered !!!!!!!
Signal: 11 (SIGSEGV)
Fault address: 0x758c408ed2c0
Signal code: 1
C++ Backtrace:
File "src/ffi/backtrace.cc", line 203, in TVMFFISegFaultHandler(int,
siginfo_t*, void*)
C backtrace (3 frames):
/home/yixind/tvm-ffi/build/lib/libtvm_ffi.so(+0x1bd20)[0x758a94e23d20]
/lib/x86_64-linux-gnu/libc.so.6(+0x45330)[0x758c49045330]
[0x758c408ed2c0]
Segmentation fault (core dumped)
```
Backtrace:
```
#0 0x00007ffff6a872c0 in ?? ()
#1 0x00007ffe43a40edb in tvm::ffi::Object::DecRef () at
include/tvm/ffi/object.h:417
#2 tvm::ffi::details::ObjectUnsafe::DecRefObjectHandle () at
include/tvm/ffi/object.h:1168
#3 tvm::ffi::TensorObj::DLManagedTensorDeleter<DLManagedTensorVersioned> ()
at include/tvm/ffi/container/tensor.h:170
#4 0x0000555555820b19 in capsule_dealloc (o=0x7ffe44374fc0) at
/usr/local/src/conda/python-3.12.10/Objects/capsule.c:256
#5 0x00007ffee7f4adad in array_dealloc ()
from
/raid/user_data/yixind/miniforge3/lib/python3.12/site-packages/numpy/_core/_multiarray_umath.cpython-312-x86_64-linux-gnu.so
#6 0x00005555557478f8 in _Py_Dealloc (op=0x7ffe430811d0) at
/usr/local/src/conda/python-3.12.10/Objects/object.c:2640
#7 Py_DECREF (op=0x7ffe430811d0) at
/usr/local/src/conda/python-3.12.10/Include/object.h:705
#8 Py_XDECREF (op=0x7ffe430811d0) at
/usr/local/src/conda/python-3.12.10/Include/object.h:798
#9 insertdict (interp=0x555555b7e570 <_PyRuntime+75728>, mp=0x7ffff75f55c0,
key=0x7ffff7128670, hash=<optimized out>, value=0x555555b55e00 <_Py_NoneStruct>)
at /usr/local/src/conda/python-3.12.10/Objects/dictobject.c:1323
#10 0x000055555583ef87 in _PyModule_ClearDict (d=0x7ffff75f55c0) at
/usr/local/src/conda/python-3.12.10/Objects/moduleobject.c:656
#11 0x000055555583e1a2 in finalize_modules_clear_weaklist (verbose=0,
weaklist=0x7ffe452a9740, interp=0x555555b7e570 <_PyRuntime+75728>)
at /usr/local/src/conda/python-3.12.10/Python/pylifecycle.c:1643
#12 finalize_modules (tstate=tstate@entry=0x555555bdbe90
<_PyRuntime+458992>) at
/usr/local/src/conda/python-3.12.10/Python/pylifecycle.c:1726
#13 0x00005555558243da in Py_FinalizeEx () at
/usr/local/src/conda/python-3.12.10/Python/pylifecycle.c:2015
#14 0x0000555555833080 in Py_RunMain () at
/usr/local/src/conda/python-3.12.10/Modules/main.c:716
#15 0x00005555557ee5d7 in Py_BytesMain (argc=<optimized out>,
argv=<optimized out>) at /usr/local/src/conda/python-3.12.10/Modules/main.c:768
#16 0x00007ffff7c2a1ca in __libc_start_call_main
(main=main@entry=0x5555557ee520 <main>, argc=argc@entry=3,
argv=argv@entry=0x7fffffffd518)
at ../sysdeps/nptl/libc_start_call_main.h:58
#17 0x00007ffff7c2a28b in __libc_start_main_impl (main=0x5555557ee520
<main>, argc=3, argv=0x7fffffffd518, init=<optimized out>, fini=<optimized
out>,
rtld_fini=<optimized out>, stack_end=0x7fffffffd508) at
../csu/libc-start.c:360
#18 0x00005555557ee47d in _start ()
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]