--- src/compiler/nir/meson.build | 1 + src/compiler/nir/nir.h | 2 + src/compiler/nir/nir_divergence_analysis.c | 333 +++++++++++++++++++++ 3 files changed, 336 insertions(+) create mode 100644 src/compiler/nir/nir_divergence_analysis.c
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index 090aa7a628..aabfeee02c 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -96,6 +96,7 @@ files_libnir = files( 'nir_control_flow_private.h', 'nir_deref.c', 'nir_deref.h', + 'nir_divergence_analysis.c', 'nir_dominance.c', 'nir_format_convert.h', 'nir_from_ssa.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index e0df95c391..374280a1cc 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3010,6 +3010,8 @@ void nir_convert_loop_to_lcssa(nir_loop *loop); */ bool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only); +bool* nir_divergence_analysis(nir_shader *shader); + bool nir_lower_phis_to_regs_block(nir_block *block); bool nir_lower_ssa_defs_to_regs_block(nir_block *block); bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl); diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c new file mode 100644 index 0000000000..d91f4e55e6 --- /dev/null +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -0,0 +1,333 @@ +/* + * Copyright ?? 2018 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Daniel Sch??rmann ([email protected]) + * + */ + +#include "nir.h" +#include "nir_worklist.h" + +/* This pass computes for each ssa definition if it is uniform. + * That is, the variable has the same value for all invocations + * of the group. + * + * This algorithm implements "The Simple Divergence Analysis" from + * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quint??o Pereira. + * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS), + * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2> + */ + + +static bool alu_src_is_divergent(bool *divergent, nir_alu_src src, unsigned num_input_components) +{ + /* If the alu src is swizzled and defined by a vec-instruction, + * we can check if the originating value is non-divergent. */ + if (num_input_components == 1 && + src.src.ssa->num_components != 1 && + src.src.parent_instr->type == nir_instr_type_alu) { + nir_alu_instr *parent = nir_instr_as_alu(src.src.parent_instr); + switch(parent->op) { + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: { + if (divergent[parent->src[src.swizzle[0]].src.ssa->index]) + return true; + return false; + } + default: + break; + } + } + return divergent[src.src.ssa->index]; +} + +static bool visit_alu(bool *divergent, nir_alu_instr *instr) +{ + if (divergent[instr->dest.dest.ssa.index]) + return false; + unsigned num_src = nir_op_infos[instr->op].num_inputs; + for (unsigned i = 0; i < num_src; i++) { + if (alu_src_is_divergent(divergent, instr->src[i], nir_op_infos[instr->op].input_sizes[i])) { + divergent[instr->dest.dest.ssa.index] = true; + return true; + } + } + divergent[instr->dest.dest.ssa.index] = false; + return false; +} + +static bool visit_intrinsic(bool *divergent, nir_intrinsic_instr *instr) +{ + if (!nir_intrinsic_infos[instr->intrinsic].has_dest) + return false; + if (divergent[instr->dest.ssa.index]) + return false; + bool is_divergent = false; + switch (instr->intrinsic) { + /* TODO: load_shared_var */ + /* load_uniform etc.*/ + case nir_intrinsic_shader_clock: + case nir_intrinsic_ballot: + case nir_intrinsic_read_invocation: + case nir_intrinsic_read_first_invocation: + case nir_intrinsic_vote_any: + case nir_intrinsic_vote_all: + case nir_intrinsic_vote_feq: + case nir_intrinsic_vote_ieq: + case nir_intrinsic_reduce: + case nir_intrinsic_load_push_constant: + case nir_intrinsic_vulkan_resource_index: + is_divergent = false; + break; + case nir_intrinsic_load_ubo: + for (unsigned i = 0; i < nir_intrinsic_infos[instr->intrinsic].num_srcs; i++) { + if (divergent[instr->src[i].ssa->index]) { + is_divergent = true; + break; + } + } + break; + case nir_intrinsic_load_interpolated_input: + case nir_intrinsic_load_barycentric_pixel: + default: + is_divergent = true; + break; + } + divergent[instr->dest.ssa.index] = is_divergent; + return is_divergent; +} + +static bool visit_tex(bool *divergent, nir_tex_instr *instr) +{ + if (divergent[instr->dest.ssa.index]) + return false; + bool is_divergent = false; + for (unsigned i = 0; i < instr->num_srcs; i++) { + switch (instr->src[i].src_type) { + case nir_tex_src_coord: + is_divergent |= divergent[instr->src[i].src.ssa->index]; + break; + default: + break; + } + } + divergent[instr->dest.ssa.index] = is_divergent; + return is_divergent; +} + + +/** There are 3 types of phi instructions: + * (1) gamma: represent the joining point of different paths + * created by an ???if-then-else??? branch. + * The resulting value is divergent if the branch condition + * or any of the source values is divergent. + * + * (2) mu: which only exist at loop headers, + * merge initial and loop-carried values. + * The resulting value is divergent if any source value + * is divergent or a divergent loop continue condition + * is associated with a different ssa-def. + * + * (3) eta: represent values that leave a loop. + * The resulting value is divergent if any loop exit condition + * or source value is divergent. + */ +static bool visit_phi(bool *divergent, nir_phi_instr *instr) +{ + if (divergent[instr->dest.ssa.index]) + return false; + + /* if any source value is divergent, the resulting value is divergent */ + nir_foreach_phi_src(src, instr) { + if (divergent[src->src.ssa->index]) { + divergent[instr->dest.ssa.index] = true; + return true; + } + } + + nir_cf_node *prev = nir_cf_node_prev(&instr->instr.block->cf_node); + + /* mu: if no predecessor node exists, the phi must be at a loop header */ + if (!prev) { + /* find the two unconditional ssa-defs (the incoming value and the back-edge) */ + nir_loop *loop = nir_cf_node_as_loop(instr->instr.block->cf_node.parent); + prev = nir_cf_node_prev(instr->instr.block->cf_node.parent); + unsigned unconditional[2]; + unsigned idx = 0; + nir_foreach_phi_src(src, instr) { + if (src->pred == nir_loop_last_block(loop) || + src->pred == nir_cf_node_as_block(prev)) + unconditional[idx++] = src->src.ssa->index; + } + assert(idx == 2); + + /* check if the loop-carried values come from a different ssa-def + * and the corresponding condition is divergent. */ + nir_foreach_phi_src(src, instr) { + if (src->src.ssa->index == unconditional[0] || + src->src.ssa->index == unconditional[1]) + continue; + + nir_cf_node *current = src->pred->cf_node.parent; + /* check recursively the conditions if any is divergent */ + while (current->type != nir_cf_node_loop) { + if (current->type == nir_cf_node_if) { + nir_if *if_node = nir_cf_node_as_if(current); + if (divergent[if_node->condition.ssa->index]) { + divergent[instr->dest.ssa.index] = true; + return true; + } + } + current = current->parent; + } + } + } + + /* gamma: check if the condition is divergent */ + else if (prev->type == nir_cf_node_if) { + nir_if *if_node = nir_cf_node_as_if(prev); + if (divergent[if_node->condition.ssa->index]) { + divergent[instr->dest.ssa.index] = true; + return true; + } + } + + /* eta: check if any loop exit condition is divergent */ + else { + assert(prev->type == nir_cf_node_loop); + nir_foreach_phi_src(src, instr) { + nir_cf_node *current = src->pred->cf_node.parent; + assert(current->type == nir_cf_node_if); + + /* check recursively the conditions if any is divergent */ + while (current->type != nir_cf_node_loop) { + if (current->type == nir_cf_node_if) { + nir_if *if_node = nir_cf_node_as_if(current); + if (divergent[if_node->condition.ssa->index]) { + divergent[instr->dest.ssa.index] = true; + return true; + } + } + current = current->parent; + } + } + } + divergent[instr->dest.ssa.index] = false; + return false; +} + +static bool visit_parallel_copy(bool *divergent, nir_parallel_copy_instr *instr) +{ + bool has_changed = false; + nir_foreach_parallel_copy_entry(entry, instr) { + if (divergent[entry->dest.ssa.index]) + continue; + divergent[entry->dest.ssa.index] = divergent[entry->src.ssa->index]; + if (divergent[entry->dest.ssa.index]) + has_changed = true; + } + return has_changed; +} + +static bool visit_load_const(bool *divergent, nir_load_const_instr *instr) +{ + divergent[instr->def.index] = false; + return false; +} + +static bool visit_ssa_undef(bool *divergent, nir_ssa_undef_instr *instr) +{ + divergent[instr->def.index] = false; + return false; +} + +static bool visit_deref(bool *divergent, nir_deref_instr *instr) +{ + nir_foreach_use(src, &instr->dest.ssa) + { + if (src->parent_instr->type != nir_instr_type_tex) { + divergent[instr->dest.ssa.index] = false; + return false; + } + } + bool before = divergent[instr->dest.ssa.index]; + divergent[instr->dest.ssa.index] = true; + return !before; +} + +bool* nir_divergence_analysis(nir_shader *shader) +{ + + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + bool *t = rzalloc_array(shader, bool, impl->ssa_alloc); + nir_block_worklist worklist; + nir_block_worklist_init(&worklist, impl->num_blocks, NULL); + nir_block_worklist_add_all(&worklist, impl); + + while (!nir_block_worklist_is_empty(&worklist)) { + nir_block *block = nir_block_worklist_pop_head(&worklist); + bool has_changed = false; + + nir_foreach_instr(instr, block) { + switch (instr->type) { + case nir_instr_type_alu: + has_changed |= visit_alu(t, nir_instr_as_alu(instr)); + break; + case nir_instr_type_intrinsic: + has_changed |= visit_intrinsic(t, nir_instr_as_intrinsic(instr)); + break; + case nir_instr_type_tex: + has_changed |= visit_tex(t, nir_instr_as_tex(instr)); + break; + case nir_instr_type_phi: + has_changed |= visit_phi(t, nir_instr_as_phi(instr)); + break; + case nir_instr_type_parallel_copy: + has_changed |= visit_parallel_copy(t, nir_instr_as_parallel_copy(instr)); + break; + case nir_instr_type_load_const: + has_changed |= visit_load_const(t, nir_instr_as_load_const(instr)); + break; + case nir_instr_type_ssa_undef: + has_changed |= visit_ssa_undef(t, nir_instr_as_ssa_undef(instr)); + break; + case nir_instr_type_deref: + has_changed |= visit_deref(t, nir_instr_as_deref(instr)); + break; + case nir_instr_type_jump: + break; + case nir_instr_type_call: + assert(false); + default: + unreachable("Invalid instruction type"); + break; + } + } + if (has_changed) { + // FIXME: this is quite inefficient! + nir_block_worklist_add_all(&worklist, impl); + } + } + return t; +} -- 2.17.1
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
