Hi,
I'm having problems with big arrays; this crashes horribly:
from pycuda import driver, compiler, gpuarray, tools
from pycuda.curandom import rand as curand
import numpy as np
import pycuda.autoinit
NV = 5632 # No of vectors
DIM = 2031 # No of dimensions per vector
BLOCK_SIZE = 16 # 512 max threads per mp
GRID_SIZE = 352
HEIGHT = 8
WIDTH = 8
gj = curand((NV, DIM))
nodes = curand((HEIGHT, WIDTH, DIM)) # Each row is a vector
dissim = curand((DIM,DIM))
nearest = gpuarray.zeros((NV), np.float32)
kernel_code = '''
__global__ void NearestPrototypeKernel(float *gj, float *nodes, float
*dissim, float *nearest)
{
// Element
int idx = blockIdx.x * blockDim.x + threadIdx.x;
float value = 0.;
for (int i = 0; i < 8; ++i) {
for (int j = 0; j < 8; ++j) {
for (int l = 0; l < 2031; ++l) {
float wp_l = gj[idx * 2031 + l];
if (wp_l > 0) {
for (int k = 0; k < 2031; ++k) {
value += 0.01;
}
}
}
}
}
nearest[idx] = value;
}
'''
mod = compiler.SourceModule(kernel_code)
npker = mod.get_function("NearestPrototypeKernel")
npker(
# inputs
gj, nodes, dissim,
# output
nearest,
# block of multiple threads
block = (BLOCK_SIZE, 1, 1),
# grid of blocks
grid = (GRID_SIZE, 1)
)
a = nearest.get()
---
with this:
/Library/Frameworks/Python.framework/Versions/6.0.0/lib/python2.6/site-packages/pycuda-2011.1-py2.6-macosx-10.5-i386.egg/pycuda/compiler.py:122:
UserWarning: The CUDA compiler suceeded, but said the following:
ptxas /tmp/tmpxft_000003e5_00000000-2_kernel.ptx, line 93; warning : Double is
not supported. Demoting to float
+stdout+stderr)
Traceback (most recent call last):
File "kernel_test.py", line 54, in <module>
a = nearest.get()
File
"/Library/Frameworks/Python.framework/Versions/6.0.0/lib/python2.6/site-packages/pycuda-2011.1-py2.6-macosx-10.5-i386.egg/pycuda/gpuarray.py",
line 177, in get
drv.memcpy_dtoh(ary, self.gpudata)
pycuda._driver.LaunchError: cuMemcpyDtoH failed: launch timeout
Error in atexit._run_exitfuncs:
Traceback (most recent call last):
File
"/Library/Frameworks/Python.framework/Versions/6.0.0/lib/python2.6/atexit.py",
line 24, in _run_exitfuncs
func(*targs, **kargs)
File
"/Library/Frameworks/Python.framework/Versions/6.0.0/lib/python2.6/site-packages/pycuda-2011.1-py2.6-macosx-10.5-i386.egg/pycuda/autoinit.py",
line 13, in _finish_up
context.pop()
LaunchError: cuCtxPopCurrent failed: launch timeout
Error in sys.exitfunc:
Traceback (most recent call last):
File
"/Library/Frameworks/Python.framework/Versions/6.0.0/lib/python2.6/atexit.py",
line 24, in _run_exitfuncs
func(*targs, **kargs)
File
"/Library/Frameworks/Python.framework/Versions/6.0.0/lib/python2.6/site-packages/pycuda-2011.1-py2.6-macosx-10.5-i386.egg/pycuda/autoinit.py",
line 13, in _finish_up
context.pop()
pycuda._driver.LaunchError: cuCtxPopCurrent failed: launch timeout
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: invalid context
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: invalid context
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: invalid context
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuModuleUnload failed: invalid context
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: invalid context
-------------------------------------------------------------------
PyCUDA ERROR: The context stack was not empty upon module cleanup.
-------------------------------------------------------------------
A context was still active when the context stack was being
cleaned up. At this point in our execution, CUDA may already
have been deinitialized, so there is no way we can finish
cleanly. The program will be aborted now.
Use Context.pop() to avoid this problem.
-------------------------------------------------------------------
Abort
----
Does anybody have any idea to get around this?
Cheers,
Matthew
_______________________________________________
PyCUDA mailing list
[email protected]
http://lists.tiker.net/listinfo/pycuda