This will set the FTZ flag (flush denorms to zero) on all opcodes that
can take it when they have sources that come from other types of
opcodes.

This resolves issues in Unigine Heaven 4.0 where there were solid-filled
boxes popping up.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89455
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 33 +++++++++++++++++++++-
 .../nouveau/codegen/nv50_ir_lowering_nvc0.h        |  1 +
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 87e75e1..9a8a41d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -96,6 +96,32 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
    bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
 }
 
+void
+NVC0LegalizeSSA::handleFTZ(Instruction *i)
+{
+   unsigned s;
+
+   if (i->dType != TYPE_F32 || i->sType != TYPE_F32)
+      return;
+
+   // Flush denorms to zero to avoid unexpected situations in code. We can
+   // assume that the output of any instruction that *could* take a ftz (even
+   // if it doesn't have one) does not need further flushing.
+   for (s = 0; i->srcExists(s); ++s) {
+      Instruction *ld = i->getSrc(s)->getInsn();
+      if (!ld)
+         continue;
+
+      OpClass cls = prog->getTarget()->getOpClass(ld->op);
+      if (ld->dType != TYPE_F32 || (
+             cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE &&
+             cls != OPCLASS_CONVERT)) {
+         i->ftz = true;
+         return;
+      }
+   }
+}
+
 bool
 NVC0LegalizeSSA::visit(Function *fn)
 {
@@ -109,8 +135,13 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
    Instruction *next;
    for (Instruction *i = bb->getEntry(); i; i = next) {
       next = i->next;
-      if (i->dType == TYPE_F32)
+      if (i->dType == TYPE_F32) {
+         OpClass cls = prog->getTarget()->getOpClass(i->op);
+         if (cls == OPCLASS_ARITH || cls == OPCLASS_COMPARE ||
+             cls == OPCLASS_CONVERT)
+            handleFTZ(i);
          continue;
+      }
       switch (i->op) {
       case OP_DIV:
       case OP_MOD:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index d8ff5cd..260e101 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -36,6 +36,7 @@ private:
    // we want to insert calls to the builtin library only after optimization
    void handleDIV(Instruction *); // integer division, modulus
    void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
+   void handleFTZ(Instruction *);
 
 private:
    BuildUtil bld;
-- 
2.0.5

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to