This patch fix one regressions in the image processing path. For all non-workarounded image which the image offset is 0, we should always use float type coord.
Signed-off-by: Zhigang Gong <[email protected]> --- backend/src/libocl/src/ocl_image.cl | 45 +++++++++++++++++++++-------------- backend/src/llvm/llvm_gen_backend.cpp | 14 +++-------- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl index 9332f30..a8dbc92 100644 --- a/backend/src/libocl/src/ocl_image.cl +++ b/backend/src/libocl/src/ocl_image.cl @@ -19,6 +19,7 @@ #include "ocl_math.h" #include "ocl_integer.h" #include "ocl_common.h" +#include "ocl_convert.h" #define int1 int #define float1 float @@ -286,22 +287,27 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord) #define GEN_FIX_INT_CLAMPING 0 #endif +#define convert_float1 convert_float +#define convert_int1 convert_int + // For integer coordinates #define DECL_READ_IMAGE0(int_clamping_fix, image_type, \ - image_data_type, suffix, coord_type) \ + image_data_type, suffix, coord_type, n) \ OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \ const sampler_t sampler, \ coord_type coord) \ { \ coord = __gen_validate_array_index(coord, cl_image); \ if (int_clamping_fix && __gen_sampler_need_fix(sampler)) \ - return __gen_ocl_read_image ##suffix(cl_image, sampler, coord, 1); \ - return __gen_ocl_read_image ##suffix(cl_image, sampler, coord, 0); \ + return __gen_ocl_read_image ##suffix(cl_image, sampler, \ + convert_int ##n(coord), 1); \ + return __gen_ocl_read_image ##suffix(cl_image, sampler, \ + convert_float ##n (coord), 0); \ } // For float coordinates #define DECL_READ_IMAGE1(int_clamping_fix, image_type, \ - image_data_type, suffix, coord_type) \ + image_data_type, suffix, coord_type, n) \ OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \ const sampler_t sampler, \ coord_type coord) \ @@ -317,15 +323,16 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord) tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \ tmpCoord = __gen_fixup_neg_boundary(tmpCoord); \ return __gen_ocl_read_image ##suffix( \ - cl_image, sampler, tmpCoord, 1); \ + cl_image, sampler, convert_int ##n(tmpCoord), 1); \ } \ } \ } \ - return __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0); \ + return __gen_ocl_read_image ##suffix(cl_image, sampler, \ + convert_float ##n (tmpCoord), 0); \ } #define DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, \ - suffix, coord_type) \ + suffix, coord_type, n) \ OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \ coord_type coord) \ { \ @@ -333,7 +340,7 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord) sampler_t defaultSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE \ | CLK_FILTER_NEAREST; \ return __gen_ocl_read_image ##suffix( \ - cl_image, defaultSampler, coord, 0); \ + cl_image, defaultSampler, convert_float ##n (coord), 0); \ } #define DECL_WRITE_IMAGE(image_type, image_data_type, suffix, coord_type) \ @@ -347,11 +354,11 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord) #define DECL_IMAGE(int_clamping_fix, image_type, image_data_type, suffix, n) \ DECL_READ_IMAGE0(int_clamping_fix, image_type, \ - image_data_type, suffix, int ##n) \ + image_data_type, suffix, int ##n, n) \ DECL_READ_IMAGE1(int_clamping_fix, image_type, \ - image_data_type, suffix, float ##n) \ - DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, suffix, int ##n) \ - DECL_WRITE_IMAGE(image_type, image_data_type, suffix, int ## n) \ + image_data_type, suffix, float ##n, n) \ + DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, suffix, int ##n, n) \ + DECL_WRITE_IMAGE(image_type, image_data_type, suffix, int ##n) \ // 1D #define DECL_IMAGE_TYPE(image_type, n) \ @@ -377,7 +384,7 @@ DECL_IMAGE_TYPE(image2d_array_t, 3) effectCoord.s0 = coord % 8192; \ effectCoord.s1 = coord / 8192; \ return __gen_ocl_read_image ##suffix( \ - cl_image, defaultSampler, effectCoord, 0); \ + cl_image, defaultSampler, convert_float2(effectCoord), 0); \ } #define DECL_IMAGE_1DBuffer(int_clamping_fix, image_data_type, suffix) \ @@ -425,9 +432,10 @@ INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord, image1d_array_t i coord = __gen_validate_array_index(coord, cl_image); \ if (int_clamping_fix && __gen_sampler_need_fix(sampler)) { \ int4 newCoord = __gen_fixup_1darray_coord(coord, cl_image); \ - return __gen_ocl_read_image ##suffix(cl_image, sampler, newCoord, 2); \ + return __gen_ocl_read_image ##suffix(cl_image, sampler, newCoord, 2); \ } \ - return __gen_ocl_read_image ##suffix(cl_image, sampler, coord, 0); \ + return __gen_ocl_read_image ##suffix(cl_image, sampler, \ + convert_float2 (coord), 0); \ } // For float coordiates @@ -448,18 +456,19 @@ INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord, image1d_array_t i tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \ float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image); \ return __gen_ocl_read_image ##suffix( \ - cl_image, sampler, newCoord, 2); \ + cl_image, sampler, convert_int4(newCoord), 2); \ } \ } \ } \ - return __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0); \ + return __gen_ocl_read_image ##suffix(cl_image, sampler, \ + convert_float2 (tmpCoord), 0); \ } #define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix) \ DECL_READ_IMAGE0_1DArray(int_clamping_fix, image_data_type, suffix, int2) \ DECL_READ_IMAGE1_1DArray(int_clamping_fix, image_data_type, \ suffix, float2) \ - DECL_READ_IMAGE_NOSAMPLER(image1d_array_t, image_data_type, suffix, int2) \ + DECL_READ_IMAGE_NOSAMPLER(image1d_array_t, image_data_type, suffix, int2, 2)\ DECL_WRITE_IMAGE(image1d_array_t, image_data_type, suffix, int2) \ DECL_IMAGE_1DArray(GEN_FIX_INT_CLAMPING, int4, i) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index b5da147..394eeed 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -3332,6 +3332,8 @@ error: bool isFloatCoord = coordType == ir::TYPE_FLOAT; bool requiredFloatCoord = samplerOffset == 0; + GBE_ASSERT(isFloatCoord == requiredFloatCoord); + vector<ir::Register> dstTupleData, srcTupleData; for (uint32_t elemID = 0; elemID < 3; elemID++) { ir::Register reg; @@ -3341,17 +3343,7 @@ error: else reg = ir::ocl::invalid; - if (isFloatCoord == requiredFloatCoord) - srcTupleData.push_back(reg); - else if (!requiredFloatCoord) { - ir::Register intCoordReg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD); - ctx.CVT(ir::TYPE_S32, ir::TYPE_FLOAT, intCoordReg, reg); - srcTupleData.push_back(intCoordReg); - } else { - ir::Register floatCoordReg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD); - ctx.CVT(ir::TYPE_FLOAT, ir::TYPE_S32, floatCoordReg, reg); - srcTupleData.push_back(floatCoordReg); - } + srcTupleData.push_back(reg); } uint32_t elemNum; -- 1.8.3.2 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
