From: Junyan He <[email protected]>

Signed-off-by: Junyan He <[email protected]>
---
 backend/src/backend/gen_insn_selection.cpp |  106 +++++++++++++++++++++++++++-
 backend/src/backend/gen_register.hpp       |   10 +++
 2 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index f8f1d29..797233e 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -474,6 +474,8 @@ namespace gbe
     uint32_t vectorNum;
     /*! If true, generate code backward */
     bool bwdCodeGeneration;
+    /*! If true, the thread map has been stored */
+    bool storeThreadMap;
     /*! To make function prototypes more readable */
     typedef const GenRegister &Reg;
 
@@ -701,8 +703,9 @@ namespace gbe
     ctx(ctx), block(NULL),
     curr(ctx.getSimdWidth()), file(ctx.getFunction().getRegisterFile()),
     maxInsnNum(ctx.getFunction().getLargestBlockSize()), dagPool(maxInsnNum),
-    stateNum(0), vectorNum(0), bwdCodeGeneration(false), 
currAuxLabel(ctx.getFunction().labelNum()),
-    bHas32X32Mul(false), bHasLongType(false), ldMsgOrder(LD_MSG_ORDER_IVB)
+    stateNum(0), vectorNum(0), bwdCodeGeneration(false), storeThreadMap(false),
+    currAuxLabel(ctx.getFunction().labelNum()), bHas32X32Mul(false), 
bHasLongType(false),
+    ldMsgOrder(LD_MSG_ORDER_IVB)
   {
     const ir::Function &fn = ctx.getFunction();
     this->regNum = fn.regNum();
@@ -4110,6 +4113,105 @@ namespace gbe
   /*! WorkGroup instruction pattern */
   DECL_PATTERN(WorkGroupInstruction)
   {
+    INLINE bool storeThreadID(Selection::Opaque &sel, uint32_t slmAddr) const
+    {
+      using namespace ir;
+      GenRegister sr0_0 = GenRegister::retype(GenRegister::sr(0), GEN_TYPE_UW);
+      const uint32_t simdWidth = sel.ctx.getSimdWidth();
+      GenRegister tmp;
+      GenRegister addr;
+      if (simdWidth == 16) {
+        tmp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_WORD), 
ir::TYPE_U16), GEN_TYPE_UD);
+        addr = GenRegister::retype(sel.selReg(sel.reg(FAMILY_WORD), 
ir::TYPE_U16), GEN_TYPE_UD);
+      } else {
+        tmp = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+        addr = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+      }
+
+      sr0_0.vstride = GEN_VERTICAL_STRIDE_0;
+      sr0_0.hstride = GEN_HORIZONTAL_STRIDE_0;
+      sr0_0.width = GEN_WIDTH_1;
+      sel.push(); {
+        sel.curr.predicate = GEN_PREDICATE_NONE;
+        sel.curr.noMask = 1;
+        sel.curr.execWidth = 8;
+
+        sel.MOV(tmp, sr0_0);
+
+        sel.MUL(addr, sel.selReg(ocl::threadid, ir::TYPE_U32), 
GenRegister::immud(2));
+        sel.ADD(addr, addr, GenRegister::immud(slmAddr));
+
+        sel.push(); {
+          sel.curr.predicate = GEN_PREDICATE_NONE;
+          sel.curr.noMask = 1;
+          sel.push(); {
+            sel.curr.execWidth = 1;
+            sel.MOV(GenRegister::flag(0, 1), GenRegister::immuw(0x01));
+          } sel.pop();
+          sel.curr.flag = 0;
+          sel.curr.subFlag = 1;
+          sel.curr.predicate = GEN_PREDICATE_NORMAL;
+          sel.curr.noMask = 0;
+          sel.BYTE_SCATTER(addr, tmp, 1, 0xfe);
+        } sel.pop();
+      } sel.pop();
+      return true;
+    }
+
+    INLINE GenRegister getNextThreadID(Selection::Opaque &sel, uint32_t 
slmAddr) const
+    {
+      using namespace ir;
+      const uint32_t simdWidth = sel.ctx.getSimdWidth();
+      GenRegister addr;
+      GenRegister nextThread;
+      GenRegister tid;
+      if (simdWidth == 16) {
+        addr = GenRegister::retype(sel.selReg(sel.reg(FAMILY_WORD), 
ir::TYPE_U16), GEN_TYPE_UD);
+        nextThread = GenRegister::retype(sel.selReg(sel.reg(FAMILY_WORD), 
ir::TYPE_U16), GEN_TYPE_UD);
+        tid = GenRegister::retype(sel.selReg(sel.reg(FAMILY_WORD), 
ir::TYPE_U16), GEN_TYPE_UD);
+      } else {
+        addr = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+        nextThread = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+        tid = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+      }
+
+      sel.push(); {
+        sel.curr.execWidth = 8;
+        sel.ADD(nextThread, sel.selReg(ocl::threadid, ir::TYPE_U32), 
GenRegister::immud(1));
+
+        /* Wrap the next thread id. */
+        sel.push(); {
+          sel.curr.predicate = GEN_PREDICATE_NONE;
+          sel.curr.noMask = 1;
+          sel.curr.flag = 0;
+          sel.curr.subFlag = 1;
+          sel.CMP(GEN_CONDITIONAL_EQ, nextThread, sel.selReg(ocl::threadn, 
ir::TYPE_U32), GenRegister::null());
+          sel.curr.predicate = GEN_PREDICATE_NORMAL;
+          sel.curr.noMask = 0;
+          sel.MOV(nextThread, GenRegister::immud(0));
+        } sel.pop();
+
+        sel.MUL(addr, nextThread, GenRegister::immud(2));
+        sel.ADD(addr, addr, GenRegister::immud(slmAddr));
+
+        sel.push(); {
+          sel.curr.predicate = GEN_PREDICATE_NONE;
+          sel.curr.noMask = 1;
+          sel.push(); {
+            sel.curr.execWidth = 1;
+            sel.MOV(GenRegister::flag(0, 1), GenRegister::immuw(0x010));
+          } sel.pop();
+          sel.curr.flag = 0;
+          sel.curr.subFlag = 1;
+          sel.curr.predicate = GEN_PREDICATE_NORMAL;
+          sel.curr.noMask = 0;
+          sel.BYTE_GATHER(tid, addr, 1, 0xfe);
+        } sel.pop();
+
+      } sel.pop();
+      return tid;
+    }
+
     INLINE bool emitWGBroadcast(Selection::Opaque &sel, const 
ir::WorkGroupInstruction &insn) const {
       /*  1. BARRIER    Ensure all the threads have set the correct value for 
the var which will be broadcasted.
           2. CMP IDs    Compare the local IDs with the specified ones in the 
function call.
diff --git a/backend/src/backend/gen_register.hpp 
b/backend/src/backend/gen_register.hpp
index 3b40b67..6e5321c 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -797,6 +797,16 @@ namespace gbe
                          GEN_HORIZONTAL_STRIDE_0);
     }
 
+    static INLINE GenRegister sr(uint32_t nr, uint32_t subnr = 0) {
+      return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
+                         GEN_ARF_STATE | nr,
+                         subnr,
+                         GEN_TYPE_UD,
+                         GEN_VERTICAL_STRIDE_8,
+                         GEN_WIDTH_8,
+                         GEN_HORIZONTAL_STRIDE_1);
+    }
+
     static INLINE GenRegister notification1(void) {
       return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
                          GEN_ARF_NOTIFICATION_COUNT,
-- 
1.7.9.5



_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to