Module: Mesa Branch: main Commit: 17d2b2f2cc39e6020e49ea06855e083361956e94 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=17d2b2f2cc39e6020e49ea06855e083361956e94
Author: Faith Ekstrand <[email protected]> Date: Tue Dec 19 12:36:12 2023 -0600 nak/sm50: Add encoding and legalization for dadd/dfma/dmul/dsetp Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26587> --- src/nouveau/compiler/nak/encode_sm50.rs | 128 ++++++++++++++++++++++++++++++++ src/nouveau/compiler/nak/legalize.rs | 74 ++++++++++++++++++ 2 files changed, 202 insertions(+) diff --git a/src/nouveau/compiler/nak/encode_sm50.rs b/src/nouveau/compiler/nak/encode_sm50.rs index 86a0f097c29..92d6e815ce3 100644 --- a/src/nouveau/compiler/nak/encode_sm50.rs +++ b/src/nouveau/compiler/nak/encode_sm50.rs @@ -1626,6 +1626,130 @@ impl SM50Instr { ); } + fn encode_dadd(&mut self, op: &OpDAdd) { + match &op.srcs[1].src_ref { + SrcRef::Zero | SrcRef::Reg(_) => { + self.set_opcode(0x5c70); + self.set_reg_fmod_src(20..28, 49, 45, op.srcs[1]); + } + SrcRef::Imm32(imm) => { + self.set_opcode(0x3870); + self.set_src_imm_f20(20..39, 56, *imm); + assert!(op.srcs[1].src_mod.is_none()); + } + SrcRef::CBuf(_) => { + self.set_opcode(0x4c70); + self.set_cb_fmod_src(20..39, 49, 45, op.srcs[1]); + } + _ => panic!("Unsupported src type"), + } + + self.set_dst(op.dst); + self.set_reg_fmod_src(8..16, 46, 48, op.srcs[0]); + self.set_rnd_mode(39..41, op.rnd_mode); + } + + fn encode_dfma(&mut self, op: &OpDFma) { + match &op.srcs[2].src_ref { + SrcRef::Zero | SrcRef::Reg(_) => { + match &op.srcs[1].src_ref { + SrcRef::Zero | SrcRef::Reg(_) => { + self.set_opcode(0x5b70); + self.set_reg_src_ref(20..28, op.srcs[1].src_ref); + } + SrcRef::Imm32(imm) => { + self.set_opcode(0x3670); + self.set_src_imm_f20(20..39, 56, *imm); + assert!(op.srcs[1].src_mod.is_none()); + } + SrcRef::CBuf(cb) => { + self.set_opcode(0x4b70); + self.set_src_cb(20..39, cb); + } + _ => panic!("Invalid dfma src1: {}", op.srcs[1]), + } + self.set_reg_src_ref(39..47, op.srcs[2].src_ref); + } + SrcRef::CBuf(cb) => { + self.set_opcode(0x5370); + self.set_reg_src_ref(39..47, op.srcs[1].src_ref); + self.set_src_cb(20..39, cb); + } + _ => panic!("Invalid dfma src2: {}", op.srcs[2]), + } + + self.set_dst(op.dst); + self.set_reg_src_ref(8..16, op.srcs[0].src_ref); + + assert!(!op.srcs[0].src_mod.has_fabs()); + assert!(!op.srcs[1].src_mod.has_fabs()); + assert!(!op.srcs[2].src_mod.has_fabs()); + self.set_bit( + 48, + op.srcs[0].src_mod.has_fneg() ^ op.srcs[1].src_mod.has_fneg(), + ); + self.set_bit(49, op.srcs[2].src_mod.has_fneg()); + + self.set_rnd_mode(50..52, op.rnd_mode); + } + + fn encode_dmul(&mut self, op: &OpDMul) { + match &op.srcs[1].src_ref { + SrcRef::Zero | SrcRef::Reg(_) => { + self.set_opcode(0x5c80); + self.set_reg_src_ref(20..28, op.srcs[1].src_ref); + } + SrcRef::Imm32(imm) => { + self.set_opcode(0x3880); + self.set_src_imm_f20(20..39, 56, *imm); + assert!(op.srcs[1].src_mod.is_none()); + } + SrcRef::CBuf(cb) => { + self.set_opcode(0x4c80); + self.set_src_cb(20..39, cb); + } + _ => panic!("Invalid dmul src1: {}", op.srcs[1]), + } + + self.set_dst(op.dst); + self.set_reg_src_ref(8..16, op.srcs[0].src_ref); + + self.set_rnd_mode(39..41, op.rnd_mode); + + assert!(!op.srcs[0].src_mod.has_fabs()); + assert!(!op.srcs[1].src_mod.has_fabs()); + self.set_bit( + 48, + op.srcs[0].src_mod.has_fneg() ^ op.srcs[1].src_mod.has_fneg(), + ); + } + + fn encode_dsetp(&mut self, op: &OpDSetP) { + match &op.srcs[1].src_ref { + SrcRef::Zero | SrcRef::Reg(_) => { + self.set_opcode(0x5b80); + self.set_reg_fmod_src(20..28, 44, 6, op.srcs[1]); + } + SrcRef::Imm32(imm) => { + self.set_opcode(0x3680); + self.set_src_imm_f20(20..39, 56, *imm); + assert!(op.srcs[1].src_mod.is_none()); + } + SrcRef::CBuf(_) => { + self.set_opcode(0x4b80); + self.set_reg_fmod_src(20..39, 44, 6, op.srcs[1]); + } + _ => panic!("Invalid dmul src1: {}", op.srcs[1]), + } + + self.set_pred_dst(3..6, op.dst); + self.set_pred_dst(0..3, Dst::None); // dst1 + self.set_pred_src(39..42, 42, op.accum); + self.set_pred_set_op(45..47, op.set_op); + self.set_float_cmp_op(48..52, op.cmp_op); + self.set_reg_fmod_src(8..16, 7, 43, op.srcs[0]); + } + fn encode_iabs(&mut self, op: &OpIAbs) { assert!(op.src.is_reg_or_zero()); @@ -1748,6 +1872,10 @@ impl SM50Instr { Op::FSet(op) => si.encode_fset(&op), Op::FSetP(op) => si.encode_fsetp(&op), Op::MuFu(op) => si.encode_mufu(&op), + Op::DAdd(op) => si.encode_dadd(&op), + Op::DFma(op) => si.encode_dfma(&op), + Op::DMul(op) => si.encode_dmul(&op), + Op::DSetP(op) => si.encode_dsetp(&op), Op::IAbs(op) => si.encode_iabs(&op), Op::IAdd2(op) => si.encode_iadd2(&op), Op::Mov(op) => si.encode_mov(&op), diff --git a/src/nouveau/compiler/nak/legalize.rs b/src/nouveau/compiler/nak/legalize.rs index b695da08914..aa9ec29120b 100644 --- a/src/nouveau/compiler/nak/legalize.rs +++ b/src/nouveau/compiler/nak/legalize.rs @@ -131,6 +131,48 @@ fn copy_alu_src_if_i20_overflow( } } +fn copy_alu_src_if_f20_overflow( + b: &mut impl SSABuilder, + src: &mut Src, + src_type: SrcType, +) { + if src.as_imm_not_f20().is_some() { + copy_alu_src(b, src, src_type); + } +} + +fn copy_alu_src_if_fabs( + b: &mut impl SSABuilder, + src: &mut Src, + src_type: SrcType, +) { + if src.src_mod.has_fabs() { + match src_type { + SrcType::F32 => { + let val = b.alloc_ssa(RegFile::GPR, 1); + b.push_op(OpFAdd { + dst: val.into(), + srcs: [Src::new_zero().fneg(), *src], + saturate: false, + rnd_mode: FRndMode::NearestEven, + ftz: false, + }); + *src = val.into(); + } + SrcType::F64 => { + let val = b.alloc_ssa(RegFile::GPR, 2); + b.push_op(OpDAdd { + dst: val.into(), + srcs: [Src::new_zero().fneg(), *src], + rnd_mode: FRndMode::NearestEven, + }); + *src = val.into(); + } + _ => panic!("Invalid ffabs srouce type"), + } + } +} + fn legalize_sm50_instr( b: &mut impl SSABuilder, _bl: &impl BlockLiveness, @@ -183,6 +225,38 @@ fn legalize_sm50_instr( Op::MuFu(op) => { copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR); } + Op::DAdd(op) => { + let [ref mut src0, ref mut src1] = op.srcs; + swap_srcs_if_not_reg(src0, src1); + copy_alu_src_if_not_reg(b, src0, SrcType::F64); + copy_alu_src_if_f20_overflow(b, src1, SrcType::F64); + } + Op::DFma(op) => { + let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; + copy_alu_src_if_fabs(b, src0, SrcType::F64); + copy_alu_src_if_fabs(b, src1, SrcType::F64); + copy_alu_src_if_fabs(b, src2, SrcType::F64); + swap_srcs_if_not_reg(src0, src1); + copy_alu_src_if_not_reg(b, src0, SrcType::F64); + copy_alu_src_if_f20_overflow(b, src1, SrcType::F64); + copy_alu_src_if_not_reg(b, src2, SrcType::F64); + } + Op::DMul(op) => { + let [ref mut src0, ref mut src1] = op.srcs; + copy_alu_src_if_fabs(b, src0, SrcType::F64); + copy_alu_src_if_fabs(b, src1, SrcType::F64); + swap_srcs_if_not_reg(src0, src1); + copy_alu_src_if_not_reg(b, src0, SrcType::F64); + copy_alu_src_if_f20_overflow(b, src1, SrcType::F64); + } + Op::DSetP(op) => { + let [ref mut src0, ref mut src1] = op.srcs; + if swap_srcs_if_not_reg(src0, src1) { + op.cmp_op = op.cmp_op.flip(); + } + copy_alu_src_if_not_reg(b, src0, SrcType::F64); + copy_alu_src_if_f20_overflow(b, src1, SrcType::F64); + } Op::IAbs(op) => { copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR); }
