Hi, all
the following code is translated from the book "CUDA by example". When it runs,
I get 'pycuda._driver.LogicError: cuFuncSetBlockShape failed: invalid value'
I have searched via google, and some pages said the the block is too big. But I
only use 64, which is very small!
Any helps? thanks
from scipy import *
from scipy import misc
import pycuda.autoinit
import pycuda.driver as drv
from pycuda.compiler import SourceModule
DIM = 64
mod = SourceModule("""
#define DIM 64
struct cuComplex{
float r, i;
__device__ cuComplex(float a, float b):r(a),i(b){}
__device__ float magnitude2() { return r*r+i*i;}
__device__ cuComplex operator*(const cuComplex &a)
{ return cuComplex(r*a.r-i*a.i, i*a.r+r*a.i); }
__device__ cuComplex operator+(const cuComplex &a)
{ return cuComplex(r + a.r, i + a.i); }
};
__device__ int julia(int x, int y)
{
const float scale=1.5;
float jx=scale*(float)(DIM/2-x)/(DIM/2);
float jy=scale*(float)(DIM/2-y)/(DIM/2);
cuComplex c(-0.8, 0.156), a(jx, jy);
for(int i=0; i<200; i++)
{
a=a*a+c;
if (a.magnitude2()>1000) return 0;
}
return 1;
}
__global__ void kernel(unsigned char *ptr)
{
unsigned int x=blockIdx.x;
unsigned int y=blockIdx.y;
unsigned int offset = x+y*gridDim.x;
int juliaValue=julia(x, y);
ptr[offset]=255*juliaValue;
}
""")
mb = mod.get_function("kernel")
dev_bitmap=zeros((DIM, DIM), dtype=dtype('c') )
mb( drv.InOut(dev_bitmap), block=(DIM, DIM, 1), grid = (1, 1))
misc.imsave("mandelbrot.png", dev_bitmap.T)
_______________________________________________
PyCUDA mailing list
[email protected]
http://lists.tiker.net/listinfo/pycuda