Module: Mesa Branch: staging/23.3 Commit: af708d5e80d673cc3a03f898d84997ec1da58794 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=af708d5e80d673cc3a03f898d84997ec1da58794
Author: Sviatoslav Peleshko <[email protected]> Date: Fri Oct 13 05:02:22 2023 +0300 intel/fs: Don't optimize DW*1 MUL if it stores value to the accumulator Fixes: a8b86459 ("i965/fs: Optimize a * 1.0 -> a.") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9570 Signed-off-by: Sviatoslav Peleshko <[email protected]> Reviewed-by: Ian Romanick <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25710> (cherry picked from commit 8f8cde4c6050d1e91101ec66e8982036da9d7700) --- .pick_status.json | 2 +- src/intel/compiler/brw_fs.cpp | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/.pick_status.json b/.pick_status.json index a10aa151b66..abc560dd200 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -194,7 +194,7 @@ "description": "intel/fs: Don't optimize DW*1 MUL if it stores value to the accumulator", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "a8b86459a1bb74cfdf0d63572a9fe194b2b5b53f", "notes": null diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index db67b332576..696b5db8d9d 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2788,6 +2788,29 @@ fs_visitor::opt_algebraic() if (brw_reg_type_is_floating_point(inst->src[1].type)) break; + /* From the BDW PRM, Vol 2a, "mul - Multiply": + * + * "When multiplying integer datatypes, if src0 is DW and src1 + * is W, irrespective of the destination datatype, the + * accumulator maintains full 48-bit precision." + * ... + * "When multiplying integer data types, if one of the sources + * is a DW, the resulting full precision data is stored in + * the accumulator." + * + * There are also similar notes in earlier PRMs. + * + * The MOV instruction can copy the bits of the source, but it + * does not clear the higher bits of the accumulator. So, because + * we might use the full accumulator in the MUL/MACH macro, we + * shouldn't replace such MULs with MOVs. + */ + if ((brw_reg_type_to_size(inst->src[0].type) == 4 || + brw_reg_type_to_size(inst->src[1].type) == 4) && + (inst->dst.is_accumulator() || + inst->writes_accumulator_implicitly(devinfo))) + break; + /* a * 1.0 = a */ if (inst->src[1].is_one()) { inst->opcode = BRW_OPCODE_MOV;
