From: Grigore Lupescu <grigore.lupescu at intel.com>

Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp | 80 +++++++++++++++++++++++++-----
 1 file changed, 67 insertions(+), 13 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index cffb016..07bdef8 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -6486,10 +6486,10 @@ namespace gbe
     INLINE bool emitWGBroadcast(Selection::Opaque &sel, const 
ir::WorkGroupInstruction &insn) const {
       /*  1. BARRIER    Ensure all the threads have set the correct value for 
the var which will be broadcasted.
           2. CMP IDs    Compare the local IDs with the specified ones in the 
function call.
-          3. STORE        Use flag to control the store of the var. Only the 
specified item will execute the store.
+          3. STORE         Use flag to control the store of the var. Only the 
specified item will execute the store.
           4. BARRIER    Ensure the specified value has been stored.
-          5. LOAD         Load the stored value to all the dst value, the dst 
of all the items will have same value,
-          so broadcasted.      */
+          5. LOAD          Load the stored value to all the dst value, the dst 
of all the items will have same value,
+          so broadcasted.       */
       using namespace ir;
       const Type type = insn.getType();
       const GenRegister src = sel.selReg(insn.getSrc(0), type);
@@ -6498,11 +6498,13 @@ namespace gbe
       const uint32_t slmAddr = insn.getSlmAddr();
       GenRegister addr = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
       vector<GenRegister> fakeTemps;
+      fakeTemps.push_back(sel.selReg(sel.reg(FAMILY_DWORD), type));
+      fakeTemps.push_back(sel.selReg(sel.reg(FAMILY_DWORD), type));
 
       GBE_ASSERT(srcNum >= 2);
       GenRegister coords[3];
       for (uint32_t i = 1; i < srcNum; i++) {
-        coords[i - 1] = sel.selReg(insn.getSrc(i), TYPE_U32);
+        coords[i - 1] = GenRegister::toUniform(sel.selReg(insn.getSrc(i), 
TYPE_U32), GEN_TYPE_UD);
       }
 
       sel.push(); {
@@ -6522,9 +6524,9 @@ namespace gbe
         sel.curr.noMask = 1;
         GenRegister lid0, lid1, lid2;
         uint32_t dim = srcNum - 1;
-        lid0 = sel.selReg(ir::ocl::lid0);
-        lid1 = sel.selReg(ir::ocl::lid1);
-        lid2 = sel.selReg(ir::ocl::lid2);
+        lid0 = GenRegister::retype(sel.selReg(ir::ocl::lid0, TYPE_U32), 
GEN_TYPE_UD);
+        lid1 = GenRegister::retype(sel.selReg(ir::ocl::lid1, TYPE_U32), 
GEN_TYPE_UD);
+        lid2 = GenRegister::retype(sel.selReg(ir::ocl::lid2, TYPE_U32), 
GEN_TYPE_UD);
 
         sel.CMP(GEN_CONDITIONAL_EQ, coords[0], lid0, 
GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
         sel.curr.predicate = GEN_PREDICATE_NORMAL;
@@ -6533,22 +6535,74 @@ namespace gbe
         if (dim >= 3)
           sel.CMP(GEN_CONDITIONAL_EQ, coords[2], lid2, 
GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
 
-        if (typeSize(src.type) == 4) {
-          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_F);
-          GenRegister _src = GenRegister::retype(src, GEN_TYPE_F);
+        /* write to SLM for BYTE/WORD/DWORD types */
+        if (typeSize(src.type) <= 4) {
+          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+          GenRegister _src = GenRegister::retype(src, GEN_TYPE_UD);
           sel.UNTYPED_WRITE(_addr, &_src, 1, GenRegister::immw(0xfe), 
fakeTemps);
         }
+        /* write to SLM for QWORD types */
+        else if (typeSize(src.type) == 8) {
+          sel.push(); {
+          /* arrange data in QWORD */
+          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+          GenRegister srcQW = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64);
+          GenRegister srcQW_p1 = src.retype(srcQW, GEN_TYPE_UD);
+          GenRegister srcQW_p2 = src.retype(src.offset(srcQW, 2, 0), 
GEN_TYPE_UD);
+          vector<GenRegister> srcVec;
+          srcVec.push_back(srcQW_p1);
+          srcVec.push_back(srcQW_p2);
+
+          /* unpack into 2 DWORD */
+          sel.UNPACK_LONG(srcQW, src);
+
+          /* perform write to SLM */
+          sel.UNTYPED_WRITE(_addr, srcVec.data(), 2, GenRegister::immw(0xfe), 
fakeTemps);
+          }sel.pop();
+        }
+        else
+          GBE_ASSERT(0);
+
       } sel.pop();
-      /* Make sure the slm var have the valid value now */
+      /* make sure the slm var have the valid value now */
       sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), 
sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
 
-      if (typeSize(src.type) == 4) {
-        sel.UNTYPED_READ(addr, &dst, 1, GenRegister::immw(0xfe), fakeTemps);
+      /* read from SLM for BYTE/WORD/DWORD types */
+      if (typeSize(src.type) <= 4) {
+        GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+        GenRegister _dst = GenRegister::retype(dst, GEN_TYPE_UD);
+        sel.UNTYPED_READ(_addr, &_dst, 1, GenRegister::immw(0xfe), fakeTemps);
+      }
+      /* read from SLM for QWORD types */
+      else if (typeSize(src.type) == 8) {
+        GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+        vector<GenRegister> _dst;
+        _dst.push_back(sel.selReg(sel.reg(FAMILY_WORD), ir::TYPE_U32));
+        _dst.push_back(sel.selReg(sel.reg(FAMILY_WORD), ir::TYPE_U32));
+        GenRegister _dstQ = dst.toUniform(_dst[0], GEN_TYPE_UL);
+
+        sel.push(); {
+        /* read from SLM */
+        sel.curr.execWidth = 8;
+        sel.UNTYPED_READ(_addr, _dst.data(), 2, GenRegister::immw(0xfe), 
fakeTemps);
+
+        /* reconstruct QWORD type */
+        _dst[0] = dst.toUniform(dst.offset(_dst[0], 0, 4), GEN_TYPE_UD);
+        _dst[1] = dst.toUniform(_dst[1], GEN_TYPE_UD);
+        sel.curr.execWidth = 1;
+        sel.MOV(_dst[0], _dst[1]);
+        } sel.pop();
+
+        /* set all elements assigned to thread */
+        sel.MOV(dst, _dstQ);
       }
+      else
+        GBE_ASSERT(0);
 
       return true;
     }
 
+
     INLINE bool emitOne(Selection::Opaque &sel, const ir::WorkGroupInstruction 
&insn, bool &markChildren) const
     {
       using namespace ir;
-- 
2.5.0

_______________________________________________
Beignet mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to