forgot to change the gen75 bind image function, please ignore this version. I already sent the v4 for this patch.
On Thu, Jun 19, 2014 at 03:36:36PM +0800, Zhigang Gong wrote: > As sample LD message doesn't support array index, we have > to create a 2D array surface with the same buffer object. > Thus one 1D array image will have two surfaces binded to it > one is the index and the second is 128 + index. > > And then at kernel side, we will access the corresponding > 2D array surface when the LD message is required otherwise > will access the origin 1D array surface. > > Signed-off-by: Zhigang Gong <[email protected]> > --- > backend/src/backend/gen_insn_selection.cpp | 9 +- > backend/src/ir/instruction.cpp | 2 +- > backend/src/ocl_stdlib.tmpl.h | 161 > +++++++++++++++++++---------- > src/cl_api.c | 5 +- > src/cl_command_queue.c | 5 + > src/cl_device_id.c | 1 + > src/cl_device_id.h | 1 + > src/cl_gt_device.h | 1 + > src/cl_mem.c | 29 +++--- > src/intel/intel_gpgpu.c | 7 +- > 10 files changed, 149 insertions(+), 72 deletions(-) > > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index ecb64cd..986aa3e 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -3606,10 +3606,15 @@ namespace gbe > msgPayloads[valueID] = sel.selReg(insn.getSrc(valueID), > insn.getSrcType()); > msgLen = srcNum; > } > - uint32_t bti = insn.getImageIndex(); > + // We switch to a fixup bti for linear filter on a image1d array > sampling. > + uint32_t bti = insn.getImageIndex() + (insn.getSamplerOffset() == 2 ? > 128 : 0); > + if (bti > 253) { > + std::cerr << "Too large bti " << bti; > + return false; > + } > uint32_t sampler = insn.getSamplerIndex(); > > - sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, > insn.getSamplerOffset()); > + sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, > insn.getSamplerOffset() != 0); > return true; > } > DECL_CTOR(SampleInstruction, 1, 1); > diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp > index d081235..435869e 100644 > --- a/backend/src/ir/instruction.cpp > +++ b/backend/src/ir/instruction.cpp > @@ -527,7 +527,7 @@ namespace ir { > uint8_t srcIsFloat:1; > uint8_t dstIsFloat:1; > uint8_t samplerIdx:4; > - uint8_t samplerOffset:1; > + uint8_t samplerOffset:2; > uint8_t imageIdx; > static const uint32_t srcNum = 3; > static const uint32_t dstNum = 4; > diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h > index 605d96d..c43172d 100755 > --- a/backend/src/ocl_stdlib.tmpl.h > +++ b/backend/src/ocl_stdlib.tmpl.h > @@ -4566,24 +4566,18 @@ OVERLOADABLE float4 __gen_ocl_read_imagef(uint > surface_id, sampler_t sampler, in > > // 2D & 1D Array read > OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, > float u, float v, uint sampler_offset); > -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, > float u, int i, uint sampler_offset); > OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, > int u, int v, uint sampler_offset); > OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, float u, float v, uint sampler_offset); > -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, float u, int i, uint sampler_offset); > OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, int u, int v, uint sampler_offset); > OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, float u, float v, uint sampler_offset); > -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, float u, int i, uint sampler_offset); > OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, int u, int v, uint sampler_offset); > > // 3D & 2D Array read > OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, > float u, float v, float w, uint sampler_offset); > -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, > float u, float v, int i, uint sampler_offset); > OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, > int u, int v, int w, uint sampler_offset); > OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, float u, float v, float w, uint sampler_offset); > -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, float u, float v, int i, uint sampler_offset); > OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, int u, int v, int w, uint sampler_offset); > OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, float u, float v, float w, uint sampler_offset); > -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, float u, float v, int i, uint sampler_offset); > OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, int u, int v, int w, uint sampler_offset); > > // 1D write > @@ -4606,6 +4600,9 @@ int __gen_ocl_get_image_height(uint surface_id); > int __gen_ocl_get_image_channel_data_type(uint surface_id); > int __gen_ocl_get_image_channel_order(uint surface_id); > int __gen_ocl_get_image_depth(uint surface_id); > +/* The printf function. */ > +int __gen_ocl_printf_stub(const char * format, ...); > +#define printf __gen_ocl_printf_stub > > // 2D 3D Image Common Macro > #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND > @@ -4616,21 +4613,49 @@ int __gen_ocl_get_image_depth(uint surface_id); > > #define GET_IMAGE(cl_image, surface_id) \ > uint surface_id = (uint)cl_image > +INLINE_OVERLOADABLE float __gen_compute_array_index(const float index, > image1d_array_t image) > +{ > + GET_IMAGE(image, surface_id); > + float array_size = __gen_ocl_get_image_depth(surface_id); > + return clamp(rint(index), 0.f, array_size - 1.f); > +} > + > +INLINE_OVERLOADABLE float __gen_compute_array_index(float index, > image2d_array_t image) > +{ > + GET_IMAGE(image, surface_id); > + float array_size = __gen_ocl_get_image_depth(surface_id); > + return clamp(rint(index), 0.f, array_size - 1.f); > +} > + > +INLINE_OVERLOADABLE int __gen_compute_array_index(int index, image1d_array_t > image) > +{ > + GET_IMAGE(image, surface_id); > + int array_size = __gen_ocl_get_image_depth(surface_id); > + return clamp(index, 0, array_size - 1); > +} > > -#define DECL_READ_IMAGE0(int_clamping_fix, \ > +INLINE_OVERLOADABLE int __gen_compute_array_index(int index, image2d_array_t > image) > +{ > + GET_IMAGE(image, surface_id); > + int array_size = __gen_ocl_get_image_depth(surface_id); > + return clamp(index, 0, array_size - 1); > +} > + > +#define DECL_READ_IMAGE0(int_clamping_fix, > \ > image_type, type, suffix, coord_type, n) > \ > INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image, > \ > const sampler_t sampler, > \ > coord_type coord) > \ > { > \ > GET_IMAGE(cl_image, surface_id); > \ > + GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai); > \ > if (int_clamping_fix && > \ > ((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) && > \ > ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) > \ > return __gen_ocl_read_image ##suffix( > \ > - EXPEND_READ_COORD(surface_id, sampler, coord), 1); > \ > + EXPEND_READ_COORD(surface_id, sampler, coord)); > \ > return __gen_ocl_read_image ##suffix( > \ > - EXPEND_READ_COORD(surface_id, sampler, (float)coord), > 0);\ > + EXPEND_READ_COORDF(surface_id, sampler, coord), 0); > \ > } > > #define DECL_READ_IMAGE1(float_coord_rounding_fix, int_clamping_fix, > \ > @@ -4640,6 +4665,7 @@ int __gen_ocl_get_image_depth(uint surface_id); > coord_type coord) > \ > { > \ > GET_IMAGE(cl_image, surface_id); > \ > + GET_IMAGE_ARRAY_SIZE(cl_image, coord, float, ai) > \ > coord_type tmpCoord = coord; > \ > if (float_coord_rounding_fix | int_clamping_fix) { > \ > if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) > \ > @@ -4655,12 +4681,12 @@ int __gen_ocl_get_image_depth(uint surface_id); > } else > \ > intCoord = tmpCoord; > \ > return __gen_ocl_read_image ##suffix( > \ > - EXPEND_READ_COORD1(surface_id, sampler, intCoord), > 1);\ > + EXPEND_READ_COORDI(surface_id, sampler, intCoord));\ > } > \ > } > \ > } > \ > return __gen_ocl_read_image ##suffix( > \ > - EXPEND_READ_COORD(surface_id, sampler, tmpCoord), > 0);\ > + EXPEND_READ_COORDF(surface_id, sampler, tmpCoord), > 0);\ > } > > #define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type, n) > \ > @@ -4668,11 +4694,12 @@ int __gen_ocl_get_image_depth(uint surface_id); > coord_type coord) > \ > { > \ > GET_IMAGE(cl_image, surface_id); > \ > + GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai) > \ > return __gen_ocl_read_image ##suffix( > \ > - EXPEND_READ_COORD(surface_id, > \ > + EXPEND_READ_COORDF(surface_id, > \ > CLK_NORMALIZED_COORDS_FALSE > \ > | CLK_ADDRESS_NONE > \ > - | CLK_FILTER_NEAREST, (float)coord), 0); > \ > + | CLK_FILTER_NEAREST, (float)coord), 0); > \ > } > > #define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \ > @@ -4707,16 +4734,12 @@ int __gen_ocl_get_image_depth(uint surface_id); > DECL_WRITE_IMAGE(image_type, type, suffix, int) > \ > DECL_WRITE_IMAGE(image_type, type, suffix, float) > > -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord > -#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord < 0 > ? -1 : coord) > +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord, 1 > +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord > +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord < 0 > ? -1 : coord), 1 > #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord = srcCoord * > __gen_ocl_get_image_width(id); > #define EXPEND_WRITE_COORD(id, coord, color) id, coord, color > - > -#define OUT_OF_BOX(coord, surface, normalized) \ > - (coord < 0 || \ > - ((normalized == 0) \ > - && (coord >= __gen_ocl_get_image_width(surface))) \ > - || ((normalized != 0) && (coord > 0x1p0))) > +#define GET_IMAGE_ARRAY_SIZE(a,b,c,d) > > #define FIXUP_FLOAT_COORD(tmpCoord) \ > { \ > @@ -4732,10 +4755,10 @@ DECL_IMAGE(0, image1d_t, float4, f) > DECL_IMAGE_INFO_COMMON(image1d_t) > > #undef EXPEND_READ_COORD > -#undef EXPEND_READ_COORD1 > +#undef EXPEND_READ_COORDF > +#undef EXPEND_READ_COORDI > #undef DENORMALIZE_COORD > #undef EXPEND_WRITE_COORD > -#undef OUT_OF_BOX > #undef FIXUP_FLOAT_COORD > #undef DECL_IMAGE > // End of 1D > @@ -4747,20 +4770,14 @@ DECL_IMAGE_INFO_COMMON(image1d_t) > DECL_WRITE_IMAGE(image_type, type, suffix, int ## n) > \ > DECL_WRITE_IMAGE(image_type, type, suffix, float ## n) > // 2D > -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1 > -#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord.s0 < > 0 ? -1 : coord.s0), \ > - (int)(coord.s1 < 0 ? -1 : > coord.s1) > +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, > coord.s1, 1 > +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, > (float)coord.s1 > +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < > 0 ? -1 : coord.s0), \ > + (int)(coord.s1 < 0 ? -1 : > coord.s1), 1 > #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * > __gen_ocl_get_image_width(id); \ > dstCoord.y = srcCoord.y * > __gen_ocl_get_image_height(id); > #define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, color > > -#define OUT_OF_BOX(coord, surface, normalized) \ > - (coord.s0 < 0 || coord.s1 < 0 || \ > - ((normalized == 0) \ > - && (coord.s0 >= __gen_ocl_get_image_width(surface) \ > - || coord.s1 >= __gen_ocl_get_image_height(surface))) \ > - || ((normalized != 0) && (coord.s0 > 0x1p0 || coord.s1 > 0x1p0))) > - > #define FIXUP_FLOAT_COORD(tmpCoord) \ > { \ > if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \ > @@ -4774,6 +4791,28 @@ DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2) > DECL_IMAGE(0, image2d_t, float4, f, 2) > > // 1D Array > +#undef GET_IMAGE_ARRAY_SIZE > +#undef EXPEND_READ_COORD > +#undef EXPEND_READ_COORDF > +#undef EXPEND_READ_COORDI > +#undef DENORMALIZE_COORD > +#undef EXPEND_WRITE_COORD > +#undef FIXUP_FLOAT_COORD > + > +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, (int)0, > ai, 2 > +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, > (float)ai > +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < > 0 ? -1 : coord.s0), 0, (int)ai, 2 > +#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * > __gen_ocl_get_image_width(id); > +#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, > __gen_compute_array_index(coord.s1, cl_image), color > +#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \ > + coord_type ai = __gen_compute_array_index(coord.s1, image); > + > +#define FIXUP_FLOAT_COORD(tmpCoord) \ > + { \ > + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \ > + tmpCoord.s0 += -0x1p-9; \ > + } > + > DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2) > DECL_IMAGE(GEN_FIX_1, image1d_array_t, uint4, ui, 2) > DECL_IMAGE(0, image1d_array_t, float4, f, 2) > @@ -4799,29 +4838,23 @@ INLINE_OVERLOADABLE size_t > get_image_array_size(image1d_array_t image) > } > > #undef EXPEND_READ_COORD > -#undef EXPEND_READ_COORD1 > +#undef EXPEND_READ_COORDI > +#undef EXPEND_READ_COORDF > #undef DENORMALIZE_COORD > #undef EXPEND_WRITE_COORD > -#undef OUT_OF_BOX > #undef FIXUP_FLOAT_COORD > +#undef GET_IMAGE_ARRAY_SIZE > // End of 2D and 1D Array > > // 3D > -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, > coord.s1, coord.s2 > -#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int) (coord.s0 > < 0 ? -1 : coord.s0), \ > - (int)(coord.s1 < 0 ? -1 : > coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2) > +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, > coord.s1, coord.s2, 1 > +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, > (float)coord.s1, (float)coord.s2 > +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 > < 0 ? -1 : coord.s0), \ > + (int)(coord.s1 < 0 ? -1 : > coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2), 1 > #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * > __gen_ocl_get_image_width(id); \ > dstCoord.y = srcCoord.y * > __gen_ocl_get_image_height(id); \ > dstCoord.z = srcCoord.z * > __gen_ocl_get_image_depth(id); > #define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, > coord.s2, color > -#define OUT_OF_BOX(coord, surface, normalized) \ > - (coord.s0 < 0 || coord.s1 < 0 || coord.s2 < 0 || \ > - ((normalized == 0) \ > - && (coord.s0 >= __gen_ocl_get_image_width(surface) \ > - || coord.s1 >= __gen_ocl_get_image_height(surface) \ > - || coord.s2 >= __gen_ocl_get_image_depth(surface))) \ > - || ((normalized != 0) \ > - &&(coord.s0 > 1 || coord.s1 > 1 || coord.s2 > 1))) > > #define FIXUP_FLOAT_COORD(tmpCoord) \ > { \ > @@ -4832,6 +4865,7 @@ INLINE_OVERLOADABLE size_t > get_image_array_size(image1d_array_t image) > if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20) \ > tmpCoord.s2 += -0x1p-9; \ > } > +#define GET_IMAGE_ARRAY_SIZE(a,b,c,d) > > DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 4) > DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 4) > @@ -4841,6 +4875,32 @@ DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 3) > DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 3) > DECL_IMAGE(0, image3d_t, float4, f, 3) > > +#undef EXPEND_READ_COORD > +#undef EXPEND_READ_COORDF > +#undef EXPEND_READ_COORDI > +#undef DENORMALIZE_COORD > +#undef EXPEND_WRITE_COORD > +#undef FIXUP_FLOAT_COORD > +#undef GET_IMAGE_ARRAY_SIZE > + > +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, > coord.s1, ai, 1 > +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, > (float)coord.s1, (float)ai > +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 > < 0 ? -1 : coord.s0), \ > + (int)(coord.s1 < 0 ? -1 : > coord.s1), (int)ai, 1 > +#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * > __gen_ocl_get_image_width(id); \ > + dstCoord.y = srcCoord.y * > __gen_ocl_get_image_height(id); > +#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, > __gen_compute_array_index(coord.s2, cl_image), color > + > +#define FIXUP_FLOAT_COORD(tmpCoord) \ > + { \ > + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20) \ > + tmpCoord.s0 += -0x1p-9; \ > + if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20) \ > + tmpCoord.s1 += -0x1p-9; \ > + } > +#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \ > + coord_type ai = __gen_compute_array_index(coord.s2, image); > + > // 2D Array > DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 4) > DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 4) > @@ -4885,11 +4945,12 @@ INLINE_OVERLOADABLE size_t > get_image_array_size(image2d_array_t image) > } > > #undef EXPEND_READ_COORD > -#undef EXPEND_READ_COORD1 > +#undef EXPEND_READ_COORDF > +#undef EXPEND_READ_COORDI > #undef DENORMALIZE_COORD > #undef EXPEND_WRITE_COORD > -#undef OUT_OF_BOX > #undef FIXUP_FLOAT_COORD > +#undef GET_IMAGE_ARRAY_SIZE > // End of 3D and 2D Array > > #undef DECL_IMAGE > @@ -5066,8 +5127,4 @@ INLINE_OVERLOADABLE float > __gen_ocl_internal_fastpath_tanh (float x) > #undef OVERLOADABLE > #undef INLINE > > -/* The printf function. */ > -int __gen_ocl_printf_stub(const char * format, ...); > -#define printf __gen_ocl_printf_stub > - > #endif /* __GEN_OCL_STDLIB_H__ */ > diff --git a/src/cl_api.c b/src/cl_api.c > index b17cc52..9e412f6 100644 > --- a/src/cl_api.c > +++ b/src/cl_api.c > @@ -674,7 +674,10 @@ clGetSupportedImageFormats(cl_context ctx, > err = CL_INVALID_VALUE; > goto error; > } > - if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE2D && > + if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE1D && > + image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY && > + image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY && > + image_type != CL_MEM_OBJECT_IMAGE2D && > image_type != CL_MEM_OBJECT_IMAGE3D)) { > err = CL_INVALID_VALUE; > goto error; > diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c > index 1bc97ac..41281f2 100644 > --- a/src/cl_command_queue.c > +++ b/src/cl_command_queue.c > @@ -137,6 +137,11 @@ cl_command_queue_bind_image(cl_command_queue queue, > cl_kernel k) > image->intel_fmt, image->image_type, > image->w, image->h, image->depth, > image->row_pitch, image->tiling); > + if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) > + cl_gpgpu_bind_image(gpgpu, k->images[i].idx + 128, image->base.bo, > image->offset, > + image->intel_fmt, image->image_type, > + image->w, image->h, image->depth, > + image->row_pitch, image->tiling); > } > return CL_SUCCESS; > } > diff --git a/src/cl_device_id.c b/src/cl_device_id.c > index af8e90c..578b548 100644 > --- a/src/cl_device_id.c > +++ b/src/cl_device_id.c > @@ -363,6 +363,7 @@ cl_get_device_info(cl_device_id device, > DECL_FIELD(IMAGE_SUPPORT, image_support) > DECL_FIELD(MAX_READ_IMAGE_ARGS, max_read_image_args) > DECL_FIELD(MAX_WRITE_IMAGE_ARGS, max_write_image_args) > + DECL_FIELD(IMAGE_MAX_ARRAY_SIZE, image_max_array_size) > DECL_FIELD(IMAGE2D_MAX_WIDTH, image2d_max_width) > DECL_FIELD(IMAGE2D_MAX_HEIGHT, image2d_max_height) > DECL_FIELD(IMAGE3D_MAX_WIDTH, image3d_max_width) > diff --git a/src/cl_device_id.h b/src/cl_device_id.h > index a5449a7..769bfd2 100644 > --- a/src/cl_device_id.h > +++ b/src/cl_device_id.h > @@ -51,6 +51,7 @@ struct _cl_device_id { > cl_uint max_read_image_args; > cl_uint max_write_image_args; > size_t image2d_max_width; > + size_t image_max_array_size; > size_t image2d_max_height; > size_t image3d_max_width; > size_t image3d_max_height; > diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h > index b8bda5e..6d03123 100644 > --- a/src/cl_gt_device.h > +++ b/src/cl_gt_device.h > @@ -41,6 +41,7 @@ > .image_support = CL_TRUE, > .max_read_image_args = 128, > .max_write_image_args = 8, > +.image_max_array_size = 2048, > .image2d_max_width = 8192, > .image2d_max_height = 8192, > .image3d_max_width = 8192, > diff --git a/src/cl_mem.c b/src/cl_mem.c > index 491993e..a7a0f59 100644 > --- a/src/cl_mem.c > +++ b/src/cl_mem.c > @@ -540,7 +540,7 @@ static cl_mem > _cl_mem_new_image(cl_context ctx, > cl_mem_flags flags, > const cl_image_format *fmt, > - const cl_mem_object_type image_type, > + const cl_mem_object_type orig_image_type, > size_t w, > size_t h, > size_t depth, > @@ -551,6 +551,7 @@ _cl_mem_new_image(cl_context ctx, > { > cl_int err = CL_SUCCESS; > cl_mem mem = NULL; > + cl_mem_object_type image_type = orig_image_type; > uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT; > size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0; > cl_image_tiling_t tiling = CL_NO_TILE; > @@ -584,8 +585,7 @@ _cl_mem_new_image(cl_context ctx, > image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY))) > DO_IMAGE_ERROR; > > - if (image_type == CL_MEM_OBJECT_IMAGE1D || > - image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { > + if (image_type == CL_MEM_OBJECT_IMAGE1D) { > size_t min_pitch = bpp * w; > if (data && pitch == 0) > pitch = min_pitch; > @@ -596,7 +596,7 @@ _cl_mem_new_image(cl_context ctx, > else if (data && slice_pitch == 0) > slice_pitch = pitch; > if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR; > - if (UNLIKELY(depth > ctx->device->image2d_max_height)) DO_IMAGE_ERROR; > + if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR; > if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; > if (UNLIKELY(data && (slice_pitch % pitch != 0))) DO_IMAGE_ERROR; > if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; > @@ -617,7 +617,14 @@ _cl_mem_new_image(cl_context ctx, > > depth = 1; > } else if (image_type == CL_MEM_OBJECT_IMAGE3D || > + image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || > image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { > + if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { > + h = 1; > + tiling = CL_NO_TILE; > + } else if (cl_driver_get_ver(ctx->drv) != 6) > + tiling = cl_get_default_tiling(); > + > size_t min_pitch = bpp * w; > if (data && pitch == 0) > pitch = min_pitch; > @@ -626,15 +633,14 @@ _cl_mem_new_image(cl_context ctx, > slice_pitch = min_slice_pitch; > if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR; > if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR; > - if (UNLIKELY(depth > ctx->device->image3d_max_depth)) DO_IMAGE_ERROR; > + if (image_type == CL_MEM_OBJECT_IMAGE3D && > + (UNLIKELY(depth > ctx->device->image3d_max_depth))) DO_IMAGE_ERROR > + else if (UNLIKELY(depth > ctx->device->image_max_array_size)) > DO_IMAGE_ERROR; > if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; > if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR; > if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; > if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR; > > - /* Pick up tiling mode (we do only linear on SNB) */ > - if (cl_driver_get_ver(ctx->drv) != 6) > - tiling = cl_get_default_tiling(); > } else > assert(0); > > @@ -643,12 +649,7 @@ _cl_mem_new_image(cl_context ctx, > /* Tiling requires to align both pitch and height */ > if (tiling == CL_NO_TILE) { > aligned_pitch = w * bpp; > - if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || > - image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || > - image_type == CL_MEM_OBJECT_IMAGE3D) > - aligned_h = ALIGN(h, valign); > - else > - aligned_h = h; > + aligned_h = ALIGN(h, valign); > } else if (tiling == CL_TILE_X) { > aligned_pitch = ALIGN(w * bpp, tilex_w); > aligned_h = ALIGN(h, tilex_h); > diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c > index 197d388..ab4cb0d 100644 > --- a/src/intel/intel_gpgpu.c > +++ b/src/intel/intel_gpgpu.c > @@ -91,7 +91,7 @@ struct intel_gpgpu > > unsigned long img_bitmap; /* image usage bitmap. */ > unsigned int img_index_base; /* base index for image surface.*/ > - drm_intel_bo *binded_img[max_img_n]; /* all images binded for the call */ > + drm_intel_bo *binded_img[max_img_n + 128]; /* all images binded for the > call */ > > unsigned long sampler_bitmap; /* sampler usage bitmap. */ > > @@ -764,7 +764,10 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu, > memset(ss, 0, sizeof(*ss)); > > ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2 > - ss->ss0.surface_type = intel_get_surface_type(type); > + if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY) > + ss->ss0.surface_type = I965_SURFACE_2D; > + else > + ss->ss0.surface_type = intel_get_surface_type(type); > if (intel_is_surface_array(type)) { > ss->ss0.surface_array = 1; > ss->ss0.surface_array_spacing = 1; > -- > 1.8.3.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
