Signed-off-by: Grigore Lupescu <[email protected]>
---
 backend/src/backend/gen_context.cpp | 48 ++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index a2e11a4..52e988e 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2943,21 +2943,38 @@ namespace gbe
           }
         }
       }
-    } else if (wg_op == ir::WORKGROUP_OP_REDUCE_ADD) {
-      GBE_ASSERT(tmp.type == theVal.type);
-      GenRegister v = GenRegister::toUniform(tmp, theVal.type);
-      for (uint32_t i = 0; i < simd; i++) {
-        p->ADD(threadData, threadData, v);
-        v.subnr += typeSize(theVal.type);
-        if (v.subnr == 32) {
-          v.subnr = 0;
-          v.nr++;
-        }
-      }
-    }
-
-    p->pop();
-  }
+    } else if (wg_op == ir::WORKGROUP_OP_REDUCE_ADD){
+
+               tmp.hstride = GEN_HORIZONTAL_STRIDE_1;
+               tmp.vstride = GEN_VERTICAL_STRIDE_4;
+               tmp.width = GEN_WIDTH_4;
+
+               GBE_ASSERT(tmp.type == theVal.type);
+               GenRegister partialSum = tmp;
+
+               /* adjust offset, compute add with ADD4/ADD */
+               for (uint32_t i = 1; i < simd/4; i++){
+                       tmp = tmp.suboffset(tmp, 4);
+                       GenNativeInstruction* insnQ1 = p->next(GEN_OPCODE_ADD);
+                       p->setHeader(insnQ1);
+                       p->setDst(insnQ1, partialSum);
+                       p->setSrc0(insnQ1, partialSum);
+                       p->setSrc1(insnQ1, tmp);
+                       insnQ1->header.execution_size = GEN_WIDTH_4;
+               }
+
+               partialSum = GenRegister::toUniform(partialSum, theVal.type);
+               for (uint32_t i = 0; i < 4; i++){
+                       p->ADD(threadData, threadData, partialSum);
+                       partialSum.subnr += typeSize(theVal.type);
+                       if (partialSum.subnr == 32) {
+                               partialSum.subnr = 0;
+                               partialSum.nr++;
+                       }
+               }
+       }
+       p->pop();
+}
 
 #define SEND_RESULT_MSG() \
 do { \
@@ -3123,7 +3140,6 @@ do { \
         p->curr.predicate = GEN_PREDICATE_NONE;
         p->WAIT(2);
         p->patchJMPI(jip, (p->n_instruction() - jip), 0);
-    
         /* Do something when get the msg. */
         p->curr.execWidth = simd;
         p->MOV(dst, msgData);
-- 
2.1.4

_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to