Module: Mesa
Branch: main
Commit: 1f5623c5576ed87f44333f255ab8c62be22d95d6
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1f5623c5576ed87f44333f255ab8c62be22d95d6

Author: Faith Ekstrand <[email protected]>
Date:   Mon Dec 18 17:50:32 2023 -0600

nak: Implement 64-bit nir_op_fsign

There is NIR lowering for this but this implementation is more
efficient.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26587>

---

 src/nouveau/compiler/nak/builder.rs  | 12 ++++++++++++
 src/nouveau/compiler/nak/from_nir.rs | 18 ++++++++++++++----
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/src/nouveau/compiler/nak/builder.rs 
b/src/nouveau/compiler/nak/builder.rs
index a315a707822..d476de93423 100644
--- a/src/nouveau/compiler/nak/builder.rs
+++ b/src/nouveau/compiler/nak/builder.rs
@@ -217,6 +217,18 @@ pub trait SSABuilder: Builder {
         dst
     }
 
+    fn dsetp(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef {
+        let dst = self.alloc_ssa(RegFile::Pred, 1);
+        self.push_op(OpDSetP {
+            dst: dst.into(),
+            set_op: PredSetOp::And,
+            cmp_op: cmp_op,
+            srcs: [x, y],
+            accum: SrcRef::True.into(),
+        });
+        dst
+    }
+
     fn iabs(&mut self, i: Src) -> SSARef {
         let dst = self.alloc_ssa(RegFile::GPR, 1);
         self.push_op(OpIAbs {
diff --git a/src/nouveau/compiler/nak/from_nir.rs 
b/src/nouveau/compiler/nak/from_nir.rs
index 25855027c8c..f813e52ca2c 100644
--- a/src/nouveau/compiler/nak/from_nir.rs
+++ b/src/nouveau/compiler/nak/from_nir.rs
@@ -885,10 +885,20 @@ impl<'a> ShaderFromNir<'a> {
                 }
             }
             nir_op_fsign => {
-                assert!(alu.def.bit_size() == 32);
-                let lz = b.fset(FloatCmpOp::OrdLt, srcs[0], 0.into());
-                let gz = b.fset(FloatCmpOp::OrdGt, srcs[0], 0.into());
-                b.fadd(gz.into(), Src::from(lz).fneg())
+                if alu.def.bit_size() == 64 {
+                    let lz = b.dsetp(FloatCmpOp::OrdLt, srcs[0], 0.into());
+                    let gz = b.dsetp(FloatCmpOp::OrdGt, srcs[0], 0.into());
+                    let hi = b.sel(lz.into(), 0xbff00000.into(), 0.into());
+                    let hi = b.sel(gz.into(), 0x3ff00000.into(), hi.into());
+                    let lo = b.copy(0.into());
+                    [lo[0], hi[0]].into()
+                } else if alu.def.bit_size() == 32 {
+                    let lz = b.fset(FloatCmpOp::OrdLt, srcs[0], 0.into());
+                    let gz = b.fset(FloatCmpOp::OrdGt, srcs[0], 0.into());
+                    b.fadd(gz.into(), Src::from(lz).fneg())
+                } else {
+                    panic!("Unsupported float type: f{}", alu.def.bit_size());
+                }
             }
             nir_op_fsin => {
                 let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);

Reply via email to