Module: Mesa
Branch: main
Commit: 17d2b2f2cc39e6020e49ea06855e083361956e94
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=17d2b2f2cc39e6020e49ea06855e083361956e94

Author: Faith Ekstrand <[email protected]>
Date:   Tue Dec 19 12:36:12 2023 -0600

nak/sm50: Add encoding and legalization for dadd/dfma/dmul/dsetp

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26587>

---

 src/nouveau/compiler/nak/encode_sm50.rs | 128 ++++++++++++++++++++++++++++++++
 src/nouveau/compiler/nak/legalize.rs    |  74 ++++++++++++++++++
 2 files changed, 202 insertions(+)

diff --git a/src/nouveau/compiler/nak/encode_sm50.rs 
b/src/nouveau/compiler/nak/encode_sm50.rs
index 86a0f097c29..92d6e815ce3 100644
--- a/src/nouveau/compiler/nak/encode_sm50.rs
+++ b/src/nouveau/compiler/nak/encode_sm50.rs
@@ -1626,6 +1626,130 @@ impl SM50Instr {
         );
     }
 
+    fn encode_dadd(&mut self, op: &OpDAdd) {
+        match &op.srcs[1].src_ref {
+            SrcRef::Zero | SrcRef::Reg(_) => {
+                self.set_opcode(0x5c70);
+                self.set_reg_fmod_src(20..28, 49, 45, op.srcs[1]);
+            }
+            SrcRef::Imm32(imm) => {
+                self.set_opcode(0x3870);
+                self.set_src_imm_f20(20..39, 56, *imm);
+                assert!(op.srcs[1].src_mod.is_none());
+            }
+            SrcRef::CBuf(_) => {
+                self.set_opcode(0x4c70);
+                self.set_cb_fmod_src(20..39, 49, 45, op.srcs[1]);
+            }
+            _ => panic!("Unsupported src type"),
+        }
+
+        self.set_dst(op.dst);
+        self.set_reg_fmod_src(8..16, 46, 48, op.srcs[0]);
+        self.set_rnd_mode(39..41, op.rnd_mode);
+    }
+
+    fn encode_dfma(&mut self, op: &OpDFma) {
+        match &op.srcs[2].src_ref {
+            SrcRef::Zero | SrcRef::Reg(_) => {
+                match &op.srcs[1].src_ref {
+                    SrcRef::Zero | SrcRef::Reg(_) => {
+                        self.set_opcode(0x5b70);
+                        self.set_reg_src_ref(20..28, op.srcs[1].src_ref);
+                    }
+                    SrcRef::Imm32(imm) => {
+                        self.set_opcode(0x3670);
+                        self.set_src_imm_f20(20..39, 56, *imm);
+                        assert!(op.srcs[1].src_mod.is_none());
+                    }
+                    SrcRef::CBuf(cb) => {
+                        self.set_opcode(0x4b70);
+                        self.set_src_cb(20..39, cb);
+                    }
+                    _ => panic!("Invalid dfma src1: {}", op.srcs[1]),
+                }
+                self.set_reg_src_ref(39..47, op.srcs[2].src_ref);
+            }
+            SrcRef::CBuf(cb) => {
+                self.set_opcode(0x5370);
+                self.set_reg_src_ref(39..47, op.srcs[1].src_ref);
+                self.set_src_cb(20..39, cb);
+            }
+            _ => panic!("Invalid dfma src2: {}", op.srcs[2]),
+        }
+
+        self.set_dst(op.dst);
+        self.set_reg_src_ref(8..16, op.srcs[0].src_ref);
+
+        assert!(!op.srcs[0].src_mod.has_fabs());
+        assert!(!op.srcs[1].src_mod.has_fabs());
+        assert!(!op.srcs[2].src_mod.has_fabs());
+        self.set_bit(
+            48,
+            op.srcs[0].src_mod.has_fneg() ^ op.srcs[1].src_mod.has_fneg(),
+        );
+        self.set_bit(49, op.srcs[2].src_mod.has_fneg());
+
+        self.set_rnd_mode(50..52, op.rnd_mode);
+    }
+
+    fn encode_dmul(&mut self, op: &OpDMul) {
+        match &op.srcs[1].src_ref {
+            SrcRef::Zero | SrcRef::Reg(_) => {
+                self.set_opcode(0x5c80);
+                self.set_reg_src_ref(20..28, op.srcs[1].src_ref);
+            }
+            SrcRef::Imm32(imm) => {
+                self.set_opcode(0x3880);
+                self.set_src_imm_f20(20..39, 56, *imm);
+                assert!(op.srcs[1].src_mod.is_none());
+            }
+            SrcRef::CBuf(cb) => {
+                self.set_opcode(0x4c80);
+                self.set_src_cb(20..39, cb);
+            }
+            _ => panic!("Invalid dmul src1: {}", op.srcs[1]),
+        }
+
+        self.set_dst(op.dst);
+        self.set_reg_src_ref(8..16, op.srcs[0].src_ref);
+
+        self.set_rnd_mode(39..41, op.rnd_mode);
+
+        assert!(!op.srcs[0].src_mod.has_fabs());
+        assert!(!op.srcs[1].src_mod.has_fabs());
+        self.set_bit(
+            48,
+            op.srcs[0].src_mod.has_fneg() ^ op.srcs[1].src_mod.has_fneg(),
+        );
+    }
+
+    fn encode_dsetp(&mut self, op: &OpDSetP) {
+        match &op.srcs[1].src_ref {
+            SrcRef::Zero | SrcRef::Reg(_) => {
+                self.set_opcode(0x5b80);
+                self.set_reg_fmod_src(20..28, 44, 6, op.srcs[1]);
+            }
+            SrcRef::Imm32(imm) => {
+                self.set_opcode(0x3680);
+                self.set_src_imm_f20(20..39, 56, *imm);
+                assert!(op.srcs[1].src_mod.is_none());
+            }
+            SrcRef::CBuf(_) => {
+                self.set_opcode(0x4b80);
+                self.set_reg_fmod_src(20..39, 44, 6, op.srcs[1]);
+            }
+            _ => panic!("Invalid dmul src1: {}", op.srcs[1]),
+        }
+
+        self.set_pred_dst(3..6, op.dst);
+        self.set_pred_dst(0..3, Dst::None); // dst1
+        self.set_pred_src(39..42, 42, op.accum);
+        self.set_pred_set_op(45..47, op.set_op);
+        self.set_float_cmp_op(48..52, op.cmp_op);
+        self.set_reg_fmod_src(8..16, 7, 43, op.srcs[0]);
+    }
+
     fn encode_iabs(&mut self, op: &OpIAbs) {
         assert!(op.src.is_reg_or_zero());
 
@@ -1748,6 +1872,10 @@ impl SM50Instr {
             Op::FSet(op) => si.encode_fset(&op),
             Op::FSetP(op) => si.encode_fsetp(&op),
             Op::MuFu(op) => si.encode_mufu(&op),
+            Op::DAdd(op) => si.encode_dadd(&op),
+            Op::DFma(op) => si.encode_dfma(&op),
+            Op::DMul(op) => si.encode_dmul(&op),
+            Op::DSetP(op) => si.encode_dsetp(&op),
             Op::IAbs(op) => si.encode_iabs(&op),
             Op::IAdd2(op) => si.encode_iadd2(&op),
             Op::Mov(op) => si.encode_mov(&op),
diff --git a/src/nouveau/compiler/nak/legalize.rs 
b/src/nouveau/compiler/nak/legalize.rs
index b695da08914..aa9ec29120b 100644
--- a/src/nouveau/compiler/nak/legalize.rs
+++ b/src/nouveau/compiler/nak/legalize.rs
@@ -131,6 +131,48 @@ fn copy_alu_src_if_i20_overflow(
     }
 }
 
+fn copy_alu_src_if_f20_overflow(
+    b: &mut impl SSABuilder,
+    src: &mut Src,
+    src_type: SrcType,
+) {
+    if src.as_imm_not_f20().is_some() {
+        copy_alu_src(b, src, src_type);
+    }
+}
+
+fn copy_alu_src_if_fabs(
+    b: &mut impl SSABuilder,
+    src: &mut Src,
+    src_type: SrcType,
+) {
+    if src.src_mod.has_fabs() {
+        match src_type {
+            SrcType::F32 => {
+                let val = b.alloc_ssa(RegFile::GPR, 1);
+                b.push_op(OpFAdd {
+                    dst: val.into(),
+                    srcs: [Src::new_zero().fneg(), *src],
+                    saturate: false,
+                    rnd_mode: FRndMode::NearestEven,
+                    ftz: false,
+                });
+                *src = val.into();
+            }
+            SrcType::F64 => {
+                let val = b.alloc_ssa(RegFile::GPR, 2);
+                b.push_op(OpDAdd {
+                    dst: val.into(),
+                    srcs: [Src::new_zero().fneg(), *src],
+                    rnd_mode: FRndMode::NearestEven,
+                });
+                *src = val.into();
+            }
+            _ => panic!("Invalid ffabs srouce type"),
+        }
+    }
+}
+
 fn legalize_sm50_instr(
     b: &mut impl SSABuilder,
     _bl: &impl BlockLiveness,
@@ -183,6 +225,38 @@ fn legalize_sm50_instr(
         Op::MuFu(op) => {
             copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR);
         }
+        Op::DAdd(op) => {
+            let [ref mut src0, ref mut src1] = op.srcs;
+            swap_srcs_if_not_reg(src0, src1);
+            copy_alu_src_if_not_reg(b, src0, SrcType::F64);
+            copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
+        }
+        Op::DFma(op) => {
+            let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
+            copy_alu_src_if_fabs(b, src0, SrcType::F64);
+            copy_alu_src_if_fabs(b, src1, SrcType::F64);
+            copy_alu_src_if_fabs(b, src2, SrcType::F64);
+            swap_srcs_if_not_reg(src0, src1);
+            copy_alu_src_if_not_reg(b, src0, SrcType::F64);
+            copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
+            copy_alu_src_if_not_reg(b, src2, SrcType::F64);
+        }
+        Op::DMul(op) => {
+            let [ref mut src0, ref mut src1] = op.srcs;
+            copy_alu_src_if_fabs(b, src0, SrcType::F64);
+            copy_alu_src_if_fabs(b, src1, SrcType::F64);
+            swap_srcs_if_not_reg(src0, src1);
+            copy_alu_src_if_not_reg(b, src0, SrcType::F64);
+            copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
+        }
+        Op::DSetP(op) => {
+            let [ref mut src0, ref mut src1] = op.srcs;
+            if swap_srcs_if_not_reg(src0, src1) {
+                op.cmp_op = op.cmp_op.flip();
+            }
+            copy_alu_src_if_not_reg(b, src0, SrcType::F64);
+            copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
+        }
         Op::IAbs(op) => {
             copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR);
         }

Reply via email to