From: Junyan He <[email protected]> The a0 value cache in Gencontext can just hold the value in compiling time, which may be different with the true offset value in run time when the code generates the backward jump. So just kill the cache of a0 and we will use load vector instruction to optimize it lader.
Signed-off-by: Junyan He <[email protected]> --- backend/src/backend/gen8_context.cpp | 54 ++++++++-------------------------- backend/src/backend/gen_context.cpp | 51 +++++++------------------------- backend/src/backend/gen_context.hpp | 1 - 3 files changed, 24 insertions(+), 82 deletions(-) diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index 920eb3e..2cdb248 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -98,8 +98,7 @@ namespace gbe p->curr.execWidth = 4; p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), - a0[0], new_a0[0] - a0[0]); + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0); GenRegister dst_ = dst; dst_.type = GEN_TYPE_UB; dst_.hstride = GEN_HORIZONTAL_STRIDE_1; @@ -159,8 +158,7 @@ namespace gbe p->curr.execWidth = 16; p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), - a0[0], new_a0[0] - a0[0]); + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0); p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); ind_src.addr_imm += 16; p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 0, 16), ind_src); @@ -218,8 +216,7 @@ namespace gbe p->curr.execWidth = 16; p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), - a0[0], new_a0[0] - a0[0]); + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0); p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); if (simd == 16) { ind_src.addr_imm += 16; @@ -862,46 +859,21 @@ namespace gbe } void Gen8Context::setA0Content(uint16_t new_a0[16], uint16_t max_offset, int sz) { - int16_t diff = new_a0[0] - this->a0[0]; if (sz == 0) sz = 16; GBE_ASSERT(sz%4 == 0); GBE_ASSERT(new_a0[0] >= 0 && new_a0[0] < 4096); - bool need_reset = false; - for (int i = 1; i < sz; i++) { - GBE_ASSERT(new_a0[i] >= 0 && new_a0[0] < 4096); - int16_t d = new_a0[i] - this->a0[i]; - if (diff != d) { - need_reset = true; - break; - } - } - GBE_ASSERT(this->a0[0] + diff < 4096 && this->a0[0] + diff >= 0); - if (!need_reset && diff >= -512 && diff + max_offset <= 511) { - return; - } else if (!need_reset && sz == 16) { - p->push(); - p->curr.execWidth = 16; - p->curr.predicate = GEN_PREDICATE_NONE; - p->curr.noMask = 1; - p->ADD(GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W), - GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W), GenRegister::immw(diff)); - p->pop(); - } else { - p->push(); - p->curr.execWidth = 1; - p->curr.predicate = GEN_PREDICATE_NONE; - p->curr.noMask = 1; - for (int i = 0; i < sz/4; i++) { - uint64_t addr = (new_a0[i*4 + 3] << 16) | (new_a0[i*4 + 2]); - addr = addr << 32; - addr = addr | (new_a0[i*4 + 1] << 16) | (new_a0[i*4]); - p->MOV(GenRegister::retype(GenRegister::addr1(i*4), GEN_TYPE_UL), GenRegister::immuint64(addr)); - } - p->pop(); + p->push(); + p->curr.execWidth = 1; + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + for (int i = 0; i < sz/4; i++) { + uint64_t addr = (new_a0[i*4 + 3] << 16) | (new_a0[i*4 + 2]); + addr = addr << 32; + addr = addr | (new_a0[i*4 + 1] << 16) | (new_a0[i*4]); + p->MOV(GenRegister::retype(GenRegister::addr1(i*4), GEN_TYPE_UL), GenRegister::immuint64(addr)); } - memcpy(this->a0, new_a0, sizeof(uint16_t)*sz); + p->pop(); } - } diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 094e6b4..684ecaf 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -51,7 +51,6 @@ namespace gbe this->ra = NULL; this->ifEndifFix = false; this->regSpillTick = 0; - memset(a0, 0, sizeof(a0)); } GenContext::~GenContext(void) { @@ -340,8 +339,7 @@ namespace gbe p->curr.execWidth = 4; p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), - a0[0], new_a0[0] - a0[0]); + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0); GenRegister dst_ = dst; dst_.type = GEN_TYPE_UB; dst_.hstride = GEN_HORIZONTAL_STRIDE_1; @@ -385,8 +383,7 @@ namespace gbe p->curr.execWidth = 8; p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), - a0[0], new_a0[0] - a0[0]); + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0); p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); for (int i = 1; i < 4; i++) { ind_src.addr_imm += 8; @@ -430,8 +427,7 @@ namespace gbe p->curr.execWidth = 8; p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), - a0[0], new_a0[0] - a0[0]); + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0); p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); for (int i = 1; i < (simd == 8 ? 2 : 4); i++) { ind_src.addr_imm += 8; @@ -1951,45 +1947,20 @@ namespace gbe } void GenContext::setA0Content(uint16_t new_a0[16], uint16_t max_offset, int sz) { - int16_t diff = new_a0[0] - this->a0[0]; - if (sz == 0) sz = 8; GBE_ASSERT(sz%4 == 0); GBE_ASSERT(new_a0[0] >= 0 && new_a0[0] < 4096); - bool need_reset = false; - for (int i = 1; i < sz; i++) { - GBE_ASSERT(new_a0[i] >= 0 && new_a0[0] < 4096); - int16_t d = new_a0[i] - this->a0[i]; - if (diff != d) { - need_reset = true; - break; - } - } - GBE_ASSERT(a0[0] + diff < 4096 && a0[0] + diff >= 0); - if (!need_reset && diff >= -512 && diff + max_offset <= 511) { - return; - } else if (!need_reset && sz == 8) { - p->push(); - p->curr.execWidth = 8; - p->curr.predicate = GEN_PREDICATE_NONE; - p->curr.noMask = 1; - p->ADD(GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W), - GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W), GenRegister::immw(diff)); - p->pop(); - } else { - p->push(); - p->curr.execWidth = 1; - p->curr.predicate = GEN_PREDICATE_NONE; - p->curr.noMask = 1; - for (int i = 0; i < sz/2; i++) { - p->MOV(GenRegister::retype(GenRegister::addr1(i*2), GEN_TYPE_UD), - GenRegister::immud(new_a0[i*2 + 1] << 16 | new_a0[i*2])); - } - p->pop(); + p->push(); + p->curr.execWidth = 1; + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + for (int i = 0; i < sz/2; i++) { + p->MOV(GenRegister::retype(GenRegister::addr1(i*2), GEN_TYPE_UD), + GenRegister::immud(new_a0[i*2 + 1] << 16 | new_a0[i*2])); } - memcpy(this->a0, new_a0, sizeof(uint16_t)*sz); + p->pop(); } BVAR(OCL_OUTPUT_REG_ALLOC, false); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 6ca88db..560248a 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -208,7 +208,6 @@ namespace gbe /*! allocate a new curbe register and insert to curbe pool. */ void allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t subValue = 0); - uint16_t a0[16]; virtual void setA0Content(uint16_t new_a0[16], uint16_t max_offset = 0, int sz = 0); private: -- 1.7.9.5 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
