Module: Mesa Branch: main Commit: 5bc27e80c9617b3d156dd91fba3a9f5b33fe445d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5bc27e80c9617b3d156dd91fba3a9f5b33fe445d
Author: Rhys Perry <[email protected]> Date: Wed Nov 29 15:46:39 2023 +0000 ac/nir: fix 32-bit offset global access optimization Since 38cff03e58b4 ("radv: use nir_lower_conv64"), u2u64(a) is replaced with pack_64_2x32_split(a, 0). fossil-db (navi31): Totals from 1203 (1.52% of 79330) affected shaders: MaxWaves: 33685 -> 33813 (+0.38%) Instrs: 1407031 -> 1374689 (-2.30%); split: -2.32%, +0.02% CodeSize: 7088652 -> 6917320 (-2.42%); split: -2.43%, +0.01% VGPRs: 69276 -> 68988 (-0.42%); split: -0.43%, +0.02% SpillSGPRs: 982 -> 977 (-0.51%); split: -0.92%, +0.41% Latency: 12536511 -> 12451605 (-0.68%); split: -0.94%, +0.27% InvThroughput: 2456803 -> 2431241 (-1.04%); split: -1.09%, +0.05% VClause: 27624 -> 27832 (+0.75%); split: -1.88%, +2.64% SClause: 31757 -> 32702 (+2.98%); split: -0.53%, +3.51% Copies: 90923 -> 91238 (+0.35%); split: -1.47%, +1.81% Branches: 25127 -> 25128 (+0.00%); split: -0.00%, +0.01% PreSGPRs: 46025 -> 46068 (+0.09%); split: -0.53%, +0.62% PreVGPRs: 53944 -> 53488 (-0.85%) Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Georg Lehmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26402> --- src/amd/common/ac_nir_lower_global_access.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_nir_lower_global_access.c b/src/amd/common/ac_nir_lower_global_access.c index e760450dcdc..48f2453b600 100644 --- a/src/amd/common/ac_nir_lower_global_access.c +++ b/src/amd/common/ac_nir_lower_global_access.c @@ -8,6 +8,20 @@ #include "nir.h" #include "nir_builder.h" +static bool +is_u2u64(nir_scalar scalar) +{ + if (nir_scalar_is_alu(scalar) && nir_scalar_alu_op(scalar) == nir_op_u2u64) + return true; + + if (nir_scalar_is_alu(scalar) && nir_scalar_alu_op(scalar) == nir_op_pack_64_2x32_split) { + nir_scalar src1 = nir_scalar_chase_alu_src(scalar, 1); + return nir_scalar_is_const(src1) && nir_scalar_as_uint(src1) == 0; + } + + return false; +} + static nir_def * try_extract_additions(nir_builder *b, nir_scalar scalar, uint64_t *out_const, nir_def **out_offset) @@ -23,7 +37,7 @@ try_extract_additions(nir_builder *b, nir_scalar scalar, uint64_t *out_const, nir_scalar src = i ? src1 : src0; if (nir_scalar_is_const(src)) { *out_const += nir_scalar_as_uint(src); - } else if (nir_scalar_is_alu(src) && nir_scalar_alu_op(src) == nir_op_u2u64) { + } else if (is_u2u64(src)) { nir_scalar offset_scalar = nir_scalar_chase_alu_src(src, 0); if (offset_scalar.def->bit_size != 32) continue;
