From: Junyan He <[email protected]>

Because the workgroup OP has forwarding msg and wait functions,
it needs all the threads to sync with each other. It has very
similar behavior as BARRIER, so we add it into schedule consideration
accordingly.

Signed-off-by: Junyan He <[email protected]>
Reviewed-by: Yang Rong <[email protected]>
---
 backend/src/backend/gen_insn_scheduling.cpp | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/backend/src/backend/gen_insn_scheduling.cpp 
b/backend/src/backend/gen_insn_scheduling.cpp
index 358a2ce..ac255bf 100644
--- a/backend/src/backend/gen_insn_scheduling.cpp
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -192,8 +192,10 @@ namespace gbe
     static const uint32_t MAX_ACC_REGISTER = 1u;
     /*! Maximum number of *physical* tm registers */
     static const uint32_t MAX_TM_REGISTER = 1u;
+    /*! Maximum number of state registers */
+    static const uint32_t MAX_ST_REGISTER = 2u;
     /*! Maximum number of *physical* arf registers */
-    static const uint32_t MAX_ARF_REGISTER = MAX_FLAG_REGISTER + 
MAX_ACC_REGISTER + MAX_TM_REGISTER;
+    static const uint32_t MAX_ARF_REGISTER = MAX_FLAG_REGISTER + 
MAX_ACC_REGISTER + MAX_TM_REGISTER + MAX_ST_REGISTER;
     /*! Stores the last node that wrote to a register / memory ... */
     vector<ScheduleDAGNode*> nodes;
     /*! store nodes each node depends on */
@@ -333,6 +335,9 @@ namespace gbe
           return grfNum + MAX_FLAG_REGISTER + nr;
         } else if (file == GEN_ARF_TM) {
           return grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER;
+        } else if (file == GEN_ARF_STATE) {
+          GBE_ASSERT(nr < MAX_ST_REGISTER);
+          return grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + 
MAX_TM_REGISTER + nr;
         } else {
           NOT_SUPPORTED;
           return 0;
@@ -500,7 +505,8 @@ namespace gbe
       // Consider barriers and wait are reading memory (local and global)
     if (insn.opcode == SEL_OP_BARRIER ||
         insn.opcode == SEL_OP_FENCE ||
-        insn.opcode == SEL_OP_WAIT) {
+        insn.opcode == SEL_OP_WAIT ||
+        insn.opcode == SEL_OP_WORKGROUP_OP) {
         const uint32_t memIndex = tracker.getMemoryIndex();
         tracker.addDependency(node, memIndex, READ_AFTER_WRITE);
       }
@@ -562,7 +568,8 @@ namespace gbe
       // Consider barriers and wait are reading memory (local and global)
       if (insn.opcode == SEL_OP_BARRIER ||
           insn.opcode == SEL_OP_FENCE ||
-          insn.opcode == SEL_OP_WAIT) {
+          insn.opcode == SEL_OP_WAIT ||
+          insn.opcode == SEL_OP_WORKGROUP_OP) {
         const uint32_t memIndex = tracker.getMemoryIndex();
         tracker.addDependency(memIndex, node, WRITE_AFTER_READ);
       }
@@ -589,7 +596,8 @@ namespace gbe
           || node->insn.opcode == SEL_OP_ENDIF
           || node->insn.opcode == SEL_OP_WHILE
           || node->insn.opcode == SEL_OP_READ_ARF
-          || node->insn.opcode == SEL_OP_BARRIER)
+          || node->insn.opcode == SEL_OP_BARRIER
+          || node->insn.opcode == SEL_OP_WORKGROUP_OP)
         tracker.makeBarrier(insnID, insnNum);
     }
 
-- 
2.5.0

_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to