Module: Mesa
Branch: main
Commit: da6a5e1f63713d0d1dd66841e1f9bb754a0cdb99
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=da6a5e1f63713d0d1dd66841e1f9bb754a0cdb99

Author: Bas Nieuwenhuizen <[email protected]>
Date:   Wed Nov  1 15:32:14 2023 +0100

nir: Add pass for clearing memory at the end of a shader.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26679>

---

 src/compiler/nir/nir.h                             |  3 +
 src/compiler/nir/nir_lower_variable_initializers.c | 96 ++++++++++++++++++++++
 2 files changed, 99 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 1c7e137efb3..5363cbd6075 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -5545,6 +5545,9 @@ bool nir_lower_variable_initializers(nir_shader *shader,
 bool nir_zero_initialize_shared_memory(nir_shader *shader,
                                        const unsigned shared_size,
                                        const unsigned chunk_size);
+bool nir_clear_shared_memory(nir_shader *shader,
+                             const unsigned shared_size,
+                             const unsigned chunk_size);
 
 bool nir_move_vec_src_uses_to_dest(nir_shader *shader, bool skip_const_srcs);
 bool nir_lower_vec_to_regs(nir_shader *shader, nir_instr_writemask_filter_cb 
cb,
diff --git a/src/compiler/nir/nir_lower_variable_initializers.c 
b/src/compiler/nir/nir_lower_variable_initializers.c
index 57cf6aa561e..20a7ae37e5a 100644
--- a/src/compiler/nir/nir_lower_variable_initializers.c
+++ b/src/compiler/nir/nir_lower_variable_initializers.c
@@ -191,3 +191,99 @@ nir_zero_initialize_shared_memory(nir_shader *shader,
 
    return true;
 }
+
+
+/** Clears all shared memory to zero at the end of the shader
+ *
+ * To easily get to the end of the shader it relies on all exits
+ * being lowered. Designed to be called late in the lowering process,
+ * e.g. doesn't need to lower vars to ssa.
+ */
+bool
+nir_clear_shared_memory(nir_shader *shader,
+                        const unsigned shared_size,
+                        const unsigned chunk_size)
+{
+   assert(chunk_size > 0);
+   assert(chunk_size % 4 == 0);
+
+   if (shared_size == 0)
+      return false;
+
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+   nir_builder b = nir_builder_at(nir_after_impl(impl));
+
+   /* The initialization logic is simplified if we can always split the memory
+    * in full chunk_size units.
+    */
+   assert(shared_size % chunk_size == 0);
+
+   const unsigned chunk_comps = chunk_size / 4;
+
+   nir_barrier(&b, SCOPE_WORKGROUP, SCOPE_WORKGROUP, NIR_MEMORY_ACQ_REL,
+               nir_var_mem_shared);
+
+   nir_def *local_index = nir_load_local_invocation_index(&b);
+   nir_def *first_offset = nir_imul_imm(&b, local_index, chunk_size);
+
+   unsigned iterations = UINT_MAX;
+   unsigned size_per_iteration = 0;
+   if (!shader->info.workgroup_size_variable) {
+      size_per_iteration = nir_static_workgroup_size(shader) * chunk_size;
+      iterations = DIV_ROUND_UP(shared_size, size_per_iteration);
+   }
+
+   if (iterations <= shader->options->max_unroll_iterations) {
+      /* Doing a manual inline here because (a) we may not optimize after and
+       * (b) the loop unroll pass doesn't deal well with the potential partial
+       * last iteration.*/
+      for (unsigned i = 0; i < iterations; ++i) {
+         const unsigned base = size_per_iteration * i;
+         bool use_check = i >= shared_size / size_per_iteration;
+         if (use_check)
+            nir_push_if(&b, nir_ult_imm(&b, first_offset, shared_size - base));
+
+         nir_store_shared(&b, nir_imm_zero(&b, chunk_comps, 32),
+                          nir_iadd_imm(&b, first_offset, base),
+                          .align_mul = chunk_size,
+                          .write_mask = ((1 << chunk_comps) - 1));
+         if (use_check)
+            nir_pop_if(&b, NULL);
+      }
+   } else {
+      nir_phi_instr *offset_phi = nir_phi_instr_create(shader);
+      nir_def_init(&offset_phi->instr, &offset_phi->def, 1, 32);
+      nir_phi_instr_add_src(offset_phi, nir_cursor_current_block(b.cursor), 
first_offset);
+
+      nir_def *size_per_iteration_def = shader->info.workgroup_size_variable ?
+                             nir_imul_imm(&b, nir_load_workgroup_size(&b), 
chunk_size) :
+                             nir_imm_int(&b, size_per_iteration);
+      nir_def *value = nir_imm_zero(&b, chunk_comps, 32);
+
+      nir_loop *loop = nir_push_loop(&b);
+      nir_block *loop_block = nir_cursor_current_block(b.cursor);
+      {
+         nir_def *offset = &offset_phi->def;
+
+         nir_push_if(&b, nir_uge_imm(&b, offset, shared_size));
+         {
+            nir_jump(&b, nir_jump_break);
+         }
+         nir_pop_if(&b, NULL);
+         nir_store_shared(&b, value, offset,
+                          .align_mul = chunk_size,
+                          .write_mask = ((1 << chunk_comps) - 1));
+
+         nir_def *new_offset = nir_iadd(&b, offset, size_per_iteration_def);
+         nir_phi_instr_add_src(offset_phi, nir_cursor_current_block(b.cursor), 
new_offset);
+      }
+      nir_pop_loop(&b, loop);
+
+      b.cursor = nir_before_block(loop_block);
+      nir_builder_instr_insert(&b, &offset_phi->instr);
+   }
+
+   nir_metadata_preserve(nir_shader_get_entrypoint(shader), nir_metadata_none);
+
+   return true;
+}

Reply via email to