Module: Mesa
Branch: main
Commit: 5bc27e80c9617b3d156dd91fba3a9f5b33fe445d
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5bc27e80c9617b3d156dd91fba3a9f5b33fe445d

Author: Rhys Perry <[email protected]>
Date:   Wed Nov 29 15:46:39 2023 +0000

ac/nir: fix 32-bit offset global access optimization

Since 38cff03e58b4 ("radv: use nir_lower_conv64"), u2u64(a) is replaced
with pack_64_2x32_split(a, 0).

fossil-db (navi31):
Totals from 1203 (1.52% of 79330) affected shaders:
MaxWaves: 33685 -> 33813 (+0.38%)
Instrs: 1407031 -> 1374689 (-2.30%); split: -2.32%, +0.02%
CodeSize: 7088652 -> 6917320 (-2.42%); split: -2.43%, +0.01%
VGPRs: 69276 -> 68988 (-0.42%); split: -0.43%, +0.02%
SpillSGPRs: 982 -> 977 (-0.51%); split: -0.92%, +0.41%
Latency: 12536511 -> 12451605 (-0.68%); split: -0.94%, +0.27%
InvThroughput: 2456803 -> 2431241 (-1.04%); split: -1.09%, +0.05%
VClause: 27624 -> 27832 (+0.75%); split: -1.88%, +2.64%
SClause: 31757 -> 32702 (+2.98%); split: -0.53%, +3.51%
Copies: 90923 -> 91238 (+0.35%); split: -1.47%, +1.81%
Branches: 25127 -> 25128 (+0.00%); split: -0.00%, +0.01%
PreSGPRs: 46025 -> 46068 (+0.09%); split: -0.53%, +0.62%
PreVGPRs: 53944 -> 53488 (-0.85%)

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Georg Lehmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26402>

---

 src/amd/common/ac_nir_lower_global_access.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_lower_global_access.c 
b/src/amd/common/ac_nir_lower_global_access.c
index e760450dcdc..48f2453b600 100644
--- a/src/amd/common/ac_nir_lower_global_access.c
+++ b/src/amd/common/ac_nir_lower_global_access.c
@@ -8,6 +8,20 @@
 #include "nir.h"
 #include "nir_builder.h"
 
+static bool
+is_u2u64(nir_scalar scalar)
+{
+   if (nir_scalar_is_alu(scalar) && nir_scalar_alu_op(scalar) == nir_op_u2u64)
+      return true;
+
+   if (nir_scalar_is_alu(scalar) && nir_scalar_alu_op(scalar) == 
nir_op_pack_64_2x32_split) {
+      nir_scalar src1 = nir_scalar_chase_alu_src(scalar, 1);
+      return nir_scalar_is_const(src1) && nir_scalar_as_uint(src1) == 0;
+   }
+
+   return false;
+}
+
 static nir_def *
 try_extract_additions(nir_builder *b, nir_scalar scalar, uint64_t *out_const,
                       nir_def **out_offset)
@@ -23,7 +37,7 @@ try_extract_additions(nir_builder *b, nir_scalar scalar, 
uint64_t *out_const,
       nir_scalar src = i ? src1 : src0;
       if (nir_scalar_is_const(src)) {
          *out_const += nir_scalar_as_uint(src);
-      } else if (nir_scalar_is_alu(src) && nir_scalar_alu_op(src) == 
nir_op_u2u64) {
+      } else if (is_u2u64(src)) {
          nir_scalar offset_scalar = nir_scalar_chase_alu_src(src, 0);
          if (offset_scalar.def->bit_size != 32)
             continue;

Reply via email to