Module: Mesa Branch: main Commit: 1f5623c5576ed87f44333f255ab8c62be22d95d6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1f5623c5576ed87f44333f255ab8c62be22d95d6
Author: Faith Ekstrand <[email protected]> Date: Mon Dec 18 17:50:32 2023 -0600 nak: Implement 64-bit nir_op_fsign There is NIR lowering for this but this implementation is more efficient. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26587> --- src/nouveau/compiler/nak/builder.rs | 12 ++++++++++++ src/nouveau/compiler/nak/from_nir.rs | 18 ++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/nouveau/compiler/nak/builder.rs b/src/nouveau/compiler/nak/builder.rs index a315a707822..d476de93423 100644 --- a/src/nouveau/compiler/nak/builder.rs +++ b/src/nouveau/compiler/nak/builder.rs @@ -217,6 +217,18 @@ pub trait SSABuilder: Builder { dst } + fn dsetp(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef { + let dst = self.alloc_ssa(RegFile::Pred, 1); + self.push_op(OpDSetP { + dst: dst.into(), + set_op: PredSetOp::And, + cmp_op: cmp_op, + srcs: [x, y], + accum: SrcRef::True.into(), + }); + dst + } + fn iabs(&mut self, i: Src) -> SSARef { let dst = self.alloc_ssa(RegFile::GPR, 1); self.push_op(OpIAbs { diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 25855027c8c..f813e52ca2c 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -885,10 +885,20 @@ impl<'a> ShaderFromNir<'a> { } } nir_op_fsign => { - assert!(alu.def.bit_size() == 32); - let lz = b.fset(FloatCmpOp::OrdLt, srcs[0], 0.into()); - let gz = b.fset(FloatCmpOp::OrdGt, srcs[0], 0.into()); - b.fadd(gz.into(), Src::from(lz).fneg()) + if alu.def.bit_size() == 64 { + let lz = b.dsetp(FloatCmpOp::OrdLt, srcs[0], 0.into()); + let gz = b.dsetp(FloatCmpOp::OrdGt, srcs[0], 0.into()); + let hi = b.sel(lz.into(), 0xbff00000.into(), 0.into()); + let hi = b.sel(gz.into(), 0x3ff00000.into(), hi.into()); + let lo = b.copy(0.into()); + [lo[0], hi[0]].into() + } else if alu.def.bit_size() == 32 { + let lz = b.fset(FloatCmpOp::OrdLt, srcs[0], 0.into()); + let gz = b.fset(FloatCmpOp::OrdGt, srcs[0], 0.into()); + b.fadd(gz.into(), Src::from(lz).fneg()) + } else { + panic!("Unsupported float type: f{}", alu.def.bit_size()); + } } nir_op_fsin => { let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);
