Module: Mesa Branch: main Commit: da6a5e1f63713d0d1dd66841e1f9bb754a0cdb99 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=da6a5e1f63713d0d1dd66841e1f9bb754a0cdb99
Author: Bas Nieuwenhuizen <[email protected]> Date: Wed Nov 1 15:32:14 2023 +0100 nir: Add pass for clearing memory at the end of a shader. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26679> --- src/compiler/nir/nir.h | 3 + src/compiler/nir/nir_lower_variable_initializers.c | 96 ++++++++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 1c7e137efb3..5363cbd6075 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -5545,6 +5545,9 @@ bool nir_lower_variable_initializers(nir_shader *shader, bool nir_zero_initialize_shared_memory(nir_shader *shader, const unsigned shared_size, const unsigned chunk_size); +bool nir_clear_shared_memory(nir_shader *shader, + const unsigned shared_size, + const unsigned chunk_size); bool nir_move_vec_src_uses_to_dest(nir_shader *shader, bool skip_const_srcs); bool nir_lower_vec_to_regs(nir_shader *shader, nir_instr_writemask_filter_cb cb, diff --git a/src/compiler/nir/nir_lower_variable_initializers.c b/src/compiler/nir/nir_lower_variable_initializers.c index 57cf6aa561e..20a7ae37e5a 100644 --- a/src/compiler/nir/nir_lower_variable_initializers.c +++ b/src/compiler/nir/nir_lower_variable_initializers.c @@ -191,3 +191,99 @@ nir_zero_initialize_shared_memory(nir_shader *shader, return true; } + + +/** Clears all shared memory to zero at the end of the shader + * + * To easily get to the end of the shader it relies on all exits + * being lowered. Designed to be called late in the lowering process, + * e.g. doesn't need to lower vars to ssa. + */ +bool +nir_clear_shared_memory(nir_shader *shader, + const unsigned shared_size, + const unsigned chunk_size) +{ + assert(chunk_size > 0); + assert(chunk_size % 4 == 0); + + if (shared_size == 0) + return false; + + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + nir_builder b = nir_builder_at(nir_after_impl(impl)); + + /* The initialization logic is simplified if we can always split the memory + * in full chunk_size units. + */ + assert(shared_size % chunk_size == 0); + + const unsigned chunk_comps = chunk_size / 4; + + nir_barrier(&b, SCOPE_WORKGROUP, SCOPE_WORKGROUP, NIR_MEMORY_ACQ_REL, + nir_var_mem_shared); + + nir_def *local_index = nir_load_local_invocation_index(&b); + nir_def *first_offset = nir_imul_imm(&b, local_index, chunk_size); + + unsigned iterations = UINT_MAX; + unsigned size_per_iteration = 0; + if (!shader->info.workgroup_size_variable) { + size_per_iteration = nir_static_workgroup_size(shader) * chunk_size; + iterations = DIV_ROUND_UP(shared_size, size_per_iteration); + } + + if (iterations <= shader->options->max_unroll_iterations) { + /* Doing a manual inline here because (a) we may not optimize after and + * (b) the loop unroll pass doesn't deal well with the potential partial + * last iteration.*/ + for (unsigned i = 0; i < iterations; ++i) { + const unsigned base = size_per_iteration * i; + bool use_check = i >= shared_size / size_per_iteration; + if (use_check) + nir_push_if(&b, nir_ult_imm(&b, first_offset, shared_size - base)); + + nir_store_shared(&b, nir_imm_zero(&b, chunk_comps, 32), + nir_iadd_imm(&b, first_offset, base), + .align_mul = chunk_size, + .write_mask = ((1 << chunk_comps) - 1)); + if (use_check) + nir_pop_if(&b, NULL); + } + } else { + nir_phi_instr *offset_phi = nir_phi_instr_create(shader); + nir_def_init(&offset_phi->instr, &offset_phi->def, 1, 32); + nir_phi_instr_add_src(offset_phi, nir_cursor_current_block(b.cursor), first_offset); + + nir_def *size_per_iteration_def = shader->info.workgroup_size_variable ? + nir_imul_imm(&b, nir_load_workgroup_size(&b), chunk_size) : + nir_imm_int(&b, size_per_iteration); + nir_def *value = nir_imm_zero(&b, chunk_comps, 32); + + nir_loop *loop = nir_push_loop(&b); + nir_block *loop_block = nir_cursor_current_block(b.cursor); + { + nir_def *offset = &offset_phi->def; + + nir_push_if(&b, nir_uge_imm(&b, offset, shared_size)); + { + nir_jump(&b, nir_jump_break); + } + nir_pop_if(&b, NULL); + nir_store_shared(&b, value, offset, + .align_mul = chunk_size, + .write_mask = ((1 << chunk_comps) - 1)); + + nir_def *new_offset = nir_iadd(&b, offset, size_per_iteration_def); + nir_phi_instr_add_src(offset_phi, nir_cursor_current_block(b.cursor), new_offset); + } + nir_pop_loop(&b, loop); + + b.cursor = nir_before_block(loop_block); + nir_builder_instr_insert(&b, &offset_phi->instr); + } + + nir_metadata_preserve(nir_shader_get_entrypoint(shader), nir_metadata_none); + + return true; +}
