On Fri, Apr 11, 2014 at 03:42:48AM +0000, Yang, Rong R wrote:
> Two comments.
>
> -----Original Message-----
> From: Beignet [mailto:[email protected]] On Behalf Of
> Zhigang Gong
> Sent: Thursday, April 10, 2014 12:41 PM
> To: [email protected]
> Cc: Gong, Zhigang
> Subject: [Beignet] [PATCH 1/2] GBE: Optimize read_image performance for
> CL_ADDRESS_CLAMP..
>
> The previous work around(due to hardware restriction.) is to use
> CL_ADDRESS_CLAMP_TO_EDGE to implement CL_ADDRESS_CLAMP which is not very
> efficient, especially for the boundary checking overhead.
> The root cause is that we need to check each pixel's coordinate.
>
> Now we change to use the LD message to implement CL_ADDRESS_CLAMP. For
> integer coordinates, we don't need to do the boundary checking. And for the
> float coordinates, we only need to check whether it's less than zero which is
> much simpler than before.
>
> This patch could bring about 20% to 30% performance gain for luxmark's medium
> and simple scene.
>
> Signed-off-by: Zhigang Gong <[email protected]>
> ---
> backend/src/backend/gen_context.cpp | 2 +-
> backend/src/backend/gen_defs.hpp | 4 +-
> backend/src/backend/gen_encoder.cpp | 7 +--
> backend/src/backend/gen_encoder.hpp | 3 +-
> backend/src/backend/gen_insn_selection.cpp | 32 +++++++++----
> backend/src/backend/gen_insn_selection.hpp | 1 +
> backend/src/llvm/llvm_gen_backend.cpp | 29 +++++++++++-
> backend/src/llvm/llvm_gen_ocl_function.hxx | 8 +++-
> backend/src/llvm/llvm_scalarize.cpp | 9 +++-
> backend/src/ocl_stdlib.tmpl.h | 72
> +++++++++++++++++++++---------
> src/intel/intel_driver.c | 2 +-
> src/intel/intel_gpgpu.c | 15 +------
> 12 files changed, 129 insertions(+), 55 deletions(-)
>
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 50f10c5..ea673b6 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -1848,7 +1848,7 @@ namespace gbe
> const unsigned char sampler = insn.extra.sampler;
> const unsigned int msgLen = insn.extra.rdmsglen;
> uint32_t simdWidth = p->curr.execWidth;
> - p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler, simdWidth, -1,
> 0);
> + p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler, simdWidth,
> + -1, 0, insn.extra.isLD);
> }
>
> void GenContext::scratchWrite(const GenRegister header, uint32_t offset,
> uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode) { diff --git
> a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
> index e731174..f24d924 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -370,8 +370,8 @@ enum GenMessageTarget {
> #define GEN_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
> #define GEN_SAMPLER_MESSAGE_SIMD16_RESINFO 2
> #define GEN_SAMPLER_MESSAGE_SIMD4X2_LD 3
> -#define GEN_SAMPLER_MESSAGE_SIMD8_LD 3
> -#define GEN_SAMPLER_MESSAGE_SIMD16_LD 3
> +#define GEN_SAMPLER_MESSAGE_SIMD8_LD 7
> +#define GEN_SAMPLER_MESSAGE_SIMD16_LD 7
>
> #define GEN5_SAMPLER_MESSAGE_SAMPLE 0
> #define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1
> diff --git a/backend/src/backend/gen_encoder.cpp
> b/backend/src/backend/gen_encoder.cpp
> index 9df031e..ce9be09 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -1264,11 +1264,12 @@ namespace gbe
> unsigned char sampler,
> uint32_t simdWidth,
> uint32_t writemask,
> - uint32_t return_format)
> + uint32_t return_format,
> + bool isLD)
> {
> if (writemask == 0) return;
> - uint32_t msg_type = (simdWidth == 16) ?
> - GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE :
> GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE;
> + uint32_t msg_type = isLD ? GEN_SAMPLER_MESSAGE_SIMD8_LD :
> + GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE;
> uint32_t response_length = (4 * (simdWidth / 8));
> uint32_t msg_length = (msg_len * (simdWidth / 8));
> if (header_present)
> diff --git a/backend/src/backend/gen_encoder.hpp
> b/backend/src/backend/gen_encoder.hpp
> index 50662fb..321c8c1 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -185,7 +185,8 @@ namespace gbe
> unsigned char sampler,
> unsigned int simdWidth,
> uint32_t writemask,
> - uint32_t return_format);
> + uint32_t return_format,
> + bool isLD);
>
> /*! TypedWrite instruction for texture */
> void TYPED_WRITE(GenRegister header, diff --git
> a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index 961f3af..fea0329 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -559,7 +559,7 @@ namespace gbe
> /*! Encode ternary instructions */
> void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
> /*! Encode sample instructions */
> - void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *msgPayloads,
> uint32_t msgNum, uint32_t bti, uint32_t sampler, bool is3D);
> + void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister
> + *msgPayloads, uint32_t msgNum, uint32_t bti, uint32_t sampler, bool
> + isLD);
> /*! Encode typed write instructions */
> void TYPED_WRITE(GenRegister *msgs, uint32_t msgNum, uint32_t bti, bool
> is3D);
> /*! Get image information */
> @@ -1500,7 +1500,7 @@ namespace gbe
>
> void Selection::Opaque::SAMPLE(GenRegister *dst, uint32_t dstNum,
> GenRegister *msgPayloads, uint32_t msgNum,
> - uint32_t bti, uint32_t sampler, bool is3D) {
> + uint32_t bti, uint32_t sampler, bool
> + isLD) {
> SelectionInstruction *insn = this->appendInsn(SEL_OP_SAMPLE, dstNum,
> msgNum);
> SelectionVector *dstVector = this->appendVector();
> SelectionVector *msgVector = this->appendVector(); @@ -1524,6 +1524,7 @@
> namespace gbe
> insn->extra.rdbti = bti;
> insn->extra.sampler = sampler;
> insn->extra.rdmsglen = msgNum;
> + insn->extra.isLD = isLD;
> }
>
> ///////////////////////////////////////////////////////////////////////////
> @@ -3161,21 +3162,36 @@ namespace gbe
> GenRegister dst[insn.getDstNum()];
> uint32_t srcNum = insn.getSrcNum();
> uint32_t valueID = 0;
> + uint32_t msgLen = 0;
>
> for (valueID = 0; valueID < insn.getDstNum(); ++valueID)
> dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType());
>
> if (!insn.is3D())
> srcNum--;
> - /* U, V, [W] */
> - for (valueID = 0; valueID < srcNum; ++valueID)
> - msgPayloads[valueID] = sel.selReg(insn.getSrc(valueID),
> insn.getSrcType());
>
> + if (insn.getSamplerOffset() != 0) {
> + // U, lod, V, [W]
> + GBE_ASSERT(insn.getSrcType() != TYPE_FLOAT);
> + msgPayloads[0] = sel.selReg(insn.getSrc(0), insn.getSrcType());
> + msgPayloads[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> + msgPayloads[2] = sel.selReg(insn.getSrc(1), insn.getSrcType());
> + if (srcNum > 2)
> + msgPayloads[3] = sel.selReg(insn.getSrc(2), insn.getSrcType());
> + // Clear the lod to zero.
> + sel.MOV(msgPayloads[1], GenRegister::immud(0));
> + msgLen = srcNum + 1;
> + } else {
> + // U, V, [W]
> + GBE_ASSERT(insn.getSrcType() == TYPE_FLOAT);
> + for (valueID = 0; valueID < srcNum; ++valueID)
> + msgPayloads[valueID] = sel.selReg(insn.getSrc(valueID),
> insn.getSrcType());
> + msgLen = srcNum;
> + }
> uint32_t bti = insn.getImageIndex();
> - /* We have the clamp border workaround. */
> - uint32_t sampler = insn.getSamplerIndex() + insn.getSamplerOffset() *
> 8;
> + uint32_t sampler = insn.getSamplerIndex();
>
> - sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, srcNum, bti, sampler,
> insn.is3D());
> + sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti,
> + sampler, insn.getSamplerOffset());
> return true;
> }
> DECL_CTOR(SampleInstruction, 1, 1); diff --git
> a/backend/src/backend/gen_insn_selection.hpp
> b/backend/src/backend/gen_insn_selection.hpp
> index 85974f0..ad8c4ec 100644
> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -123,6 +123,7 @@ namespace gbe
> uint16_t rdbti:8;
> uint16_t sampler:5;
> uint16_t rdmsglen:3;
> + bool isLD; // is this a ld message?
> };
> uint32_t barrierType;
> bool longjmp;
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index 5a2ba16..b46e991 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2210,6 +2210,12 @@ namespace gbe
> case GEN_OCL_READ_IMAGE_I_3D:
> case GEN_OCL_READ_IMAGE_UI_3D:
> case GEN_OCL_READ_IMAGE_F_3D:
> + case GEN_OCL_READ_IMAGE_I_I:
> + case GEN_OCL_READ_IMAGE_UI_I:
> + case GEN_OCL_READ_IMAGE_F_I:
> + case GEN_OCL_READ_IMAGE_I_3D_I:
> + case GEN_OCL_READ_IMAGE_UI_3D_I:
> + case GEN_OCL_READ_IMAGE_F_3D_I:
> {
> // dst is a 4 elements vector. We allocate all 4 registers here.
> uint32_t elemNum;
> @@ -2480,6 +2486,12 @@ namespace gbe
> case GEN_OCL_READ_IMAGE_I_3D:
> case GEN_OCL_READ_IMAGE_UI_3D:
> case GEN_OCL_READ_IMAGE_F_3D:
> + case GEN_OCL_READ_IMAGE_I_I:
> + case GEN_OCL_READ_IMAGE_UI_I:
> + case GEN_OCL_READ_IMAGE_F_I:
> + case GEN_OCL_READ_IMAGE_I_3D_I:
> + case GEN_OCL_READ_IMAGE_UI_3D_I:
> + case GEN_OCL_READ_IMAGE_F_3D_I:
> {
> GBE_ASSERT(AI != AE); const ir::Register surfaceReg =
> this->getRegister(*AI); ++AI;
> const uint8_t surfaceID =
> ctx.getFunction().getImageSet()->getIdx(surfaceReg);
> @@ -2491,7 +2503,12 @@ namespace gbe
> GBE_ASSERT(AI != AE); const ir::Register vcoord =
> this->getRegister(*AI); ++AI;
> ir::Register wcoord;
> bool is3D = false;
> - if (it->second >= GEN_OCL_READ_IMAGE_I_3D) {
> + if (it->second == GEN_OCL_READ_IMAGE_I_3D ||
> + it->second == GEN_OCL_READ_IMAGE_UI_3D ||
> + it->second == GEN_OCL_READ_IMAGE_F_3D ||
> + it->second == GEN_OCL_READ_IMAGE_I_3D_I ||
> + it->second == GEN_OCL_READ_IMAGE_UI_3D_I ||
> + it->second == GEN_OCL_READ_IMAGE_F_3D_I) {
> GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
> is3D = true;
> } else
> @@ -2524,18 +2541,26 @@ namespace gbe
> case GEN_OCL_READ_IMAGE_UI:
> case GEN_OCL_READ_IMAGE_I_3D:
> case GEN_OCL_READ_IMAGE_UI_3D:
> + case GEN_OCL_READ_IMAGE_I_I:
> + case GEN_OCL_READ_IMAGE_UI_I:
> + case GEN_OCL_READ_IMAGE_I_3D_I:
> + case GEN_OCL_READ_IMAGE_UI_3D_I:
> dstType = ir::TYPE_U32;
> break;
> case GEN_OCL_READ_IMAGE_F:
> case GEN_OCL_READ_IMAGE_F_3D:
> + case GEN_OCL_READ_IMAGE_F_I:
> + case GEN_OCL_READ_IMAGE_F_3D_I:
> dstType = ir::TYPE_FLOAT;
> break;
> default:
> GBE_ASSERT(0); // never been here.
> }
>
> + bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D;
> +
> ctx.SAMPLE(surfaceID, dstTuple, srcTuple, dstType ==
> ir::TYPE_FLOAT,
> - true, sampler, samplerOffset, is3D);
> + isFloatCoord, sampler, samplerOffset, is3D);
> break;
> }
> case GEN_OCL_WRITE_IMAGE_I:
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx
> b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 5bf794a..7058a60 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -49,10 +49,16 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16,
> __gen_ocl_force_simd16) DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I,
> _Z21__gen_ocl_read_imageijtffj) DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI,
> _Z22__gen_ocl_read_imageuijtffj) DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F,
> _Z21__gen_ocl_read_imagefjtffj)
> -
> DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, _Z21__gen_ocl_read_imageijtfffj)
> DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, _Z22__gen_ocl_read_imageuijtfffj)
> DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, _Z21__gen_ocl_read_imagefjtfffj)
> +// work around read image with the LD message. The coords are integer type.
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_I, _Z21__gen_ocl_read_imageijtiij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_I,
> +_Z22__gen_ocl_read_imageuijtiij) DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_I,
> +_Z21__gen_ocl_read_imagefjtiij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I,
> +_Z21__gen_ocl_read_imageijtiiij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I,
> +_Z22__gen_ocl_read_imageuijtiiij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I,
> +_Z21__gen_ocl_read_imagefjtiiij)
>
> // To write_image functions.
> DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I, _Z22__gen_ocl_write_imageijiiDv4_i)
> diff --git a/backend/src/llvm/llvm_scalarize.cpp
> b/backend/src/llvm/llvm_scalarize.cpp
> index 7095473..911be30 100644
> --- a/backend/src/llvm/llvm_scalarize.cpp
> +++ b/backend/src/llvm/llvm_scalarize.cpp
> @@ -1,4 +1,4 @@
> -/*
> +;/*
> * Copyright © 2012 Intel Corporation
> *
> * This library is free software; you can redistribute it and/or @@ -649,6
> +649,12 @@ namespace gbe {
> case GEN_OCL_READ_IMAGE_I_3D:
> case GEN_OCL_READ_IMAGE_UI_3D:
> case GEN_OCL_READ_IMAGE_F_3D:
> + case GEN_OCL_READ_IMAGE_I_I:
> + case GEN_OCL_READ_IMAGE_UI_I:
> + case GEN_OCL_READ_IMAGE_F_I:
> + case GEN_OCL_READ_IMAGE_I_3D_I:
> + case GEN_OCL_READ_IMAGE_UI_3D_I:
> + case GEN_OCL_READ_IMAGE_F_3D_I:
> case GEN_OCL_GET_IMAGE_WIDTH:
> case GEN_OCL_GET_IMAGE_HEIGHT:
> {
> @@ -797,7 +803,6 @@ namespace gbe {
> for (SmallVectorImpl<PHINode*>::iterator phiI = incompletePhis.begin(),
> phiE = incompletePhis.end();
> phiI != phiE; ++phiI) {
> assert(canGetComponentArgs(*phiI) && "Phi's operands never
> scalarized");
> -
> // Fill in each component of this phi
> VectorValues& vVals = vectorVals[*phiI];
> for (int c = 0; c < GetComponentCount(*phiI); ++c) { diff --git
> a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index
> 50107d8..b7dc607 100755
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -4538,12 +4538,18 @@ int __gen_ocl_force_simd16(void);
> /////////////////////////////////////////////////////////////////////////////
>
> OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
> float u, float v, uint sampler_offset);
> +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> +sampler, int u, int v, uint sampler_offset);
> OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler, float u, float v, uint sampler_offset);
> +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> +sampler, int u, int v, uint sampler_offset);
> OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler, float u, float v, uint sampler_offset);
> +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> +sampler, int u, int v, uint sampler_offset);
>
> OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
> float u, float v, float w, uint sampler_offset);
> +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> +sampler, int u, int v, int w, uint sampler_offset);
> OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler, float u, float v, float w, uint sampler_offset);
> +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> +sampler, int u, int v, int w, uint sampler_offset);
> OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler, float u, float v, float w, uint sampler_offset);
> +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> +sampler, int u, int v, int w, uint sampler_offset);
>
> OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int4
> color); OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u,
> int v, uint4 color); @@ -4567,8 +4573,27 @@ int
> __gen_ocl_get_image_depth(uint surface_id); #define GEN_FIX_1 0 #endif
>
> -#define DECL_READ_IMAGE(float_coord_rounding_fix, int_clamping_fix,
> \
> - image_type, type, suffix, coord_type)
> \
> +#define DECL_READ_IMAGE0(float_coord_rounding_fix, int_clamping_fix,
> \
> + image_type, type, suffix, coord_type, n)
> \
> + INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image,
> \
> + const sampler_t sampler,
> \
> + coord_type coord)
> \
> + {
> \
> + GET_IMAGE(cl_image, surface_id);
> \
> + if (float_coord_rounding_fix | int_clamping_fix) {
> \
> + if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)
> \
> + && ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) {
> \
> + if (int_clamping_fix)
> \
> + return __gen_ocl_read_image ##suffix(
> \
> + EXPEND_READ_COORD(surface_id, sampler, coord), 1);\
> + }
> \
> + }
> \
> + return __gen_ocl_read_image ##suffix(
> \
> + EXPEND_READ_COORD(surface_id, sampler,
> +(float)coord), 0);\
> + }
> +
> >>>>>>>>>>>>> float_coord_rounding_fix is useless in DECL_READ_IMAGE0. In
> >>>>>>>>>>>>> fact, the only difference of two return is the last parameter.
> >>>>>>>>>>>>> So why not use a var to avoid the if blocks.
Good idea, after this patch, the macro could be simplfied a little bit.
>
>
>
> +#define DECL_READ_IMAGE1(float_coord_rounding_fix, int_clamping_fix,
> \
> + image_type, type, suffix, coord_type, n)
> \
> INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image,
> \
> const sampler_t sampler,
> \
> coord_type coord)
> \
> @@ -4576,25 +4601,20 @@ int __gen_ocl_get_image_depth(uint surface_id);
> GET_IMAGE(cl_image, surface_id);
> \
> coord_type tmpCoord = coord;
> \
> if (float_coord_rounding_fix | int_clamping_fix) {
> \
> - if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) \
> - && ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) { \
> + if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)
> \
> + && ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) {
> \
> if (float_coord_rounding_fix
> \
> - && ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0)) { \
> + && ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0)) {
> \
> FIXUP_FLOAT_COORD(tmpCoord);
> \
> }
> \
> if (int_clamping_fix) {
> \
> - if (OUT_OF_BOX(tmpCoord, surface_id,
> \
> - (sampler & CLK_NORMALIZED_COORDS_TRUE))) { \
> - unsigned int border_alpha;
> \
> - int order = __gen_ocl_get_image_channel_order(surface_id);
> \
> - if (!CLK_HAS_ALPHA(order)) {
> \
> - border_alpha = 1;
> \
> + coord_type intCoord;
> \
> + if (sampler & CLK_NORMALIZED_COORDS_TRUE) {
> \
> + DENORMALIZE_COORD(surface_id, intCoord, tmpCoord);
> \
> } else
> \
> - border_alpha = 0;
> \
> - return (type)(0, 0, 0, border_alpha);
> \
> - } else
> \
> + intCoord = tmpCoord;
> \
> return __gen_ocl_read_image ##suffix(
> \
> - EXPEND_READ_COORD(surface_id, sampler, tmpCoord),
> 1);\
> + EXPEND_READ_COORD1(surface_id, sampler,
> + intCoord), 1);\
> }
> \
> }
> \
> }
> \
> >>>>>>Now only float coordinate use DECL_READ_IMAGE1, why still need
> >>>>>>int_clamping_fix here?
The int clamping fix is for the integer surface type, not for the coordinate.
We need this fix for both float/integer coordinate, if the surface is integer
type.
_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet