Mesa (main): ir3: lower 64b registers

GitLab Mirror Fri, 15 Dec 2023 09:51:01 -0800

Module: Mesa
Branch: main
Commit: 286caa5080703a436f313fe8a575b8ec38657d50
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=286caa5080703a436f313fe8a575b8ec38657d50


Author: Job Noorman <[email protected]>
Date:   Fri Nov 10 11:57:34 2023 +0100

ir3: lower 64b registers

After all int64/double lowerings, there might still be 64b registers
left which ir3 currently doesn't handle. This only happens in a small
number of Piglit tests where those registers (or the variables they come
from) did not get DCE'd.

This patch handles 64b registers in ir3 by adding a NIR pass that does
the following:
 - @decl_reg -> split in two 32b ones
 - @store_reg -> unpack_64_2x32_split_x/y and two separate stores
 - @load_reg -> two separate loads and pack_64_2x32_split

After this pass, the 64b vecs used for the original loads/stores are
still present and are also not handled yet by ir3. This patch removes
them by running nir_lower_alu_to_scalar and nir_copy_prop.

Signed-off-by: Job Noorman <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26175>

---

 src/freedreno/ci/freedreno-a618-fails.txt |  6 --
 src/freedreno/ci/freedreno-a630-fails.txt |  6 --
 src/freedreno/ir3/ir3_context.c           | 15 +++++
 src/freedreno/ir3/ir3_nir.h               |  1 +
 src/freedreno/ir3/ir3_nir_lower_64b.c     | 97 +++++++++++++++++++++++++++++++
 5 files changed, 113 insertions(+), 12 deletions(-)

diff --git a/src/freedreno/ci/freedreno-a618-fails.txt 
b/src/freedreno/ci/freedreno-a618-fails.txt
index efd88a3b535..c2ffed11f95 100644
--- a/src/freedreno/ci/freedreno-a618-fails.txt
+++ b/src/freedreno/ci/freedreno-a618-fails.txt
@@ -106,16 +106,10 @@ spec@arb_shader_image_load_store@qualifiers@r8/strict 
layout qualifiers/permissi
 # ir3_nir_lower_tess.c:251: lower_block_to_explicit_output: Assertion 
`util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1)' failed.
 spec@arb_tessellation_shader@execution@tcs-input-read-mat,Crash
 
-# Some 64b not getting lowered to 32b:
-spec@arb_tessellation_shader@execution@variable-indexing@vs-output-array-dvec4-index-wr-before-tcs,Crash
-
 spec@arb_texture_rectangle@1-1-linear-texture,Fail
 
 spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match,Fail
 
-# fails unrelated to GL_ARB_enhanced_layouts
-spec@arb_enhanced_layouts@execution@component-layout@vs-fs-array-dvec3,Crash
-
 # fails on gen1 (a618/a630) with both fd and zink, but passes on gen4..
 # maybe gen1 sqe doesn't handle the count==0 case?
 spec@arb_indirect_parameters@tf-count-arrays,Fail
diff --git a/src/freedreno/ci/freedreno-a630-fails.txt 
b/src/freedreno/ci/freedreno-a630-fails.txt
index 3c6a15eb89d..fb7bf5c0f1e 100644
--- a/src/freedreno/ci/freedreno-a630-fails.txt
+++ b/src/freedreno/ci/freedreno-a630-fails.txt
@@ -109,16 +109,10 @@ spec@arb_shader_image_load_store@qualifiers@r8/strict 
layout qualifiers/permissi
 # ir3_nir_lower_tess.c:251: lower_block_to_explicit_output: Assertion 
`util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1)' failed.
 spec@arb_tessellation_shader@execution@tcs-input-read-mat,Crash
 
-# Some 64b not getting lowered to 32b:
-spec@arb_tessellation_shader@execution@variable-indexing@vs-output-array-dvec4-index-wr-before-tcs,Crash
-
 spec@arb_texture_rectangle@1-1-linear-texture,Fail
 
 spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match,Fail
 
-# fails unrelated to GL_ARB_enhanced_layouts
-spec@arb_enhanced_layouts@execution@component-layout@vs-fs-array-dvec3,Crash
-
 # fails on gen1 (a618/a630) with both fd and zink, but passes on gen4..
 # maybe gen1 sqe doesn't handle the count==0 case?
 spec@arb_indirect_parameters@tf-count-arrays,Fail
diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c
index 4cc3038d51b..4e540ee59ea 100644
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -91,6 +91,21 @@ ir3_context_init(struct ir3_compiler *compiler, struct 
ir3_shader *shader,
    bool needs_late_alg = false;
    NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs, 1);
 
+   if (progress) {
+      bool regs_progress = false;
+
+      /* Split 64b registers into two 32b ones. */
+      NIR_PASS(regs_progress, ctx->s, ir3_nir_lower_64b_regs);
+
+      if (regs_progress) {
+         /* After splitting registers, we might still have some 64b vecs. Run
+          * some passes to get rid of them.
+          */
+         NIR_PASS_V(ctx->s, nir_lower_alu_to_scalar, NULL, NULL);
+         NIR_PASS_V(ctx->s, nir_copy_prop);
+      }
+   }
+
    /* we could need cleanup after lower_locals_to_regs */
    while (progress) {
       progress = false;
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index de6b70e7877..a4adde07225 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -65,6 +65,7 @@ void ir3_nir_lower_gs(nir_shader *shader);
 bool ir3_nir_lower_64b_intrinsics(nir_shader *shader);
 bool ir3_nir_lower_64b_undef(nir_shader *shader);
 bool ir3_nir_lower_64b_global(nir_shader *shader);
+bool ir3_nir_lower_64b_regs(nir_shader *shader);
 
 void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s);
 void ir3_nir_lower_io_to_temporaries(nir_shader *s);
diff --git a/src/freedreno/ir3/ir3_nir_lower_64b.c 
b/src/freedreno/ir3/ir3_nir_lower_64b.c
index e22a05cb622..5d35743c626 100644
--- a/src/freedreno/ir3/ir3_nir_lower_64b.c
+++ b/src/freedreno/ir3/ir3_nir_lower_64b.c
@@ -299,3 +299,100 @@ ir3_nir_lower_64b_global(nir_shader *shader)
          shader, lower_64b_global_filter,
          lower_64b_global, NULL);
 }
+
+/*
+ * Lowering for 64b registers:
+ * - @decl_reg -> split in two 32b ones
+ * - @store_reg -> unpack_64_2x32_split_x/y and two separate stores
+ * - @load_reg -> two separate loads and pack_64_2x32_split
+ */
+
+static void
+lower_64b_reg(nir_builder *b, nir_intrinsic_instr *reg)
+{
+   unsigned num_components = nir_intrinsic_num_components(reg);
+   unsigned num_array_elems = nir_intrinsic_num_array_elems(reg);
+
+   nir_def *reg_hi = nir_decl_reg(b, num_components, 32, num_array_elems);
+   nir_def *reg_lo = nir_decl_reg(b, num_components, 32, num_array_elems);
+
+   nir_foreach_reg_store_safe (store_reg_src, reg) {
+      nir_intrinsic_instr *store =
+         nir_instr_as_intrinsic(nir_src_parent_instr(store_reg_src));
+      b->cursor = nir_before_instr(&store->instr);
+
+      nir_def *packed = store->src[0].ssa;
+      nir_def *unpacked_lo = nir_unpack_64_2x32_split_x(b, packed);
+      nir_def *unpacked_hi = nir_unpack_64_2x32_split_y(b, packed);
+      int base = nir_intrinsic_base(store);
+
+      if (store->intrinsic == nir_intrinsic_store_reg) {
+         nir_build_store_reg(b, unpacked_lo, reg_lo, .base = base);
+         nir_build_store_reg(b, unpacked_hi, reg_hi, .base = base);
+      } else {
+         assert(store->intrinsic == nir_intrinsic_store_reg_indirect);
+
+         nir_def *offset = store->src[2].ssa;
+         nir_store_reg_indirect(b, unpacked_lo, reg_lo, offset, .base = base);
+         nir_store_reg_indirect(b, unpacked_hi, reg_hi, offset, .base = base);
+      }
+
+      nir_instr_remove(&store->instr);
+   }
+
+   nir_foreach_reg_load_safe (load_reg_src, reg) {
+      nir_intrinsic_instr *load =
+         nir_instr_as_intrinsic(nir_src_parent_instr(load_reg_src));
+      b->cursor = nir_before_instr(&load->instr);
+
+      int base = nir_intrinsic_base(load);
+      nir_def *load_lo, *load_hi;
+
+      if (load->intrinsic == nir_intrinsic_load_reg) {
+         load_lo =
+            nir_build_load_reg(b, num_components, 32, reg_lo, .base = base);
+         load_hi =
+            nir_build_load_reg(b, num_components, 32, reg_hi, .base = base);
+      } else {
+         assert(load->intrinsic == nir_intrinsic_load_reg_indirect);
+
+         nir_def *offset = load->src[1].ssa;
+         load_lo = nir_load_reg_indirect(b, num_components, 32, reg_lo, offset,
+                                         .base = base);
+         load_hi = nir_load_reg_indirect(b, num_components, 32, reg_hi, offset,
+                                         .base = base);
+      }
+
+      nir_def *packed = nir_pack_64_2x32_split(b, load_lo, load_hi);
+      nir_def_rewrite_uses(&load->def, packed);
+      nir_instr_remove(&load->instr);
+   }
+
+   nir_instr_remove(&reg->instr);
+}
+
+bool
+ir3_nir_lower_64b_regs(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_function_impl (impl, shader) {
+      bool impl_progress = false;
+      nir_builder b = nir_builder_create(impl);
+
+      nir_foreach_reg_decl_safe (reg, impl) {
+         if (nir_intrinsic_bit_size(reg) == 64) {
+            lower_64b_reg(&b, reg);
+            impl_progress = true;
+         }
+      }
+
+      if (impl_progress) {
+         nir_metadata_preserve(
+            impl, nir_metadata_block_index | nir_metadata_dominance);
+         progress = true;
+      }
+   }
+
+   return progress;
+}

Mesa (main): ir3: lower 64b registers

Reply via email to