Your block is 64x64=4096 threads big. The error message is pretty accurate, I'd say. :)
Andreas 金陆 <[email protected]> writes: > Hi, all > the following code is translated from the book "CUDA by example". When it > runs, I get 'pycuda._driver.LogicError: cuFuncSetBlockShape failed: invalid > value' > I have searched via google, and some pages said the the block is too big. But > I only use 64, which is very small! > Any helps? thanks > > from scipy import * > from scipy import misc > import pycuda.autoinit > import pycuda.driver as drv > from pycuda.compiler import SourceModule > DIM = 64 > mod = SourceModule(""" > #define DIM 64 > struct cuComplex{ > float r, i; > __device__ cuComplex(float a, float b):r(a),i(b){} > __device__ float magnitude2() { return r*r+i*i;} > > __device__ cuComplex operator*(const cuComplex &a) > { return cuComplex(r*a.r-i*a.i, i*a.r+r*a.i); } > > __device__ cuComplex operator+(const cuComplex &a) > { return cuComplex(r + a.r, i + a.i); } > }; > __device__ int julia(int x, int y) > { > const float scale=1.5; > float jx=scale*(float)(DIM/2-x)/(DIM/2); > float jy=scale*(float)(DIM/2-y)/(DIM/2); > > cuComplex c(-0.8, 0.156), a(jx, jy); > > for(int i=0; i<200; i++) > { > a=a*a+c; > if (a.magnitude2()>1000) return 0; > } > return 1; > } > __global__ void kernel(unsigned char *ptr) > { > unsigned int x=blockIdx.x; > unsigned int y=blockIdx.y; > unsigned int offset = x+y*gridDim.x; > > int juliaValue=julia(x, y); > ptr[offset]=255*juliaValue; > } > """) > mb = mod.get_function("kernel") > dev_bitmap=zeros((DIM, DIM), dtype=dtype('c') ) > mb( drv.InOut(dev_bitmap), block=(DIM, DIM, 1), grid = (1, 1)) > > misc.imsave("mandelbrot.png", dev_bitmap.T) > > > _______________________________________________ > PyCUDA mailing list > [email protected] > http://lists.tiker.net/listinfo/pycuda
pgpCDj5zBiCBz.pgp
Description: PGP signature
_______________________________________________ PyCUDA mailing list [email protected] http://lists.tiker.net/listinfo/pycuda
