Module: Mesa Branch: main Commit: 72341747f48d17a5e17b29ab0d914e29d24d9717 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=72341747f48d17a5e17b29ab0d914e29d24d9717
Author: Jesse Natalie <[email protected]> Date: Thu Nov 9 10:12:13 2023 -0800 d3d12: Split dvec3 interpolatns into devc2 and double Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26156> --- src/gallium/drivers/d3d12/d3d12_compiler.cpp | 2 +- src/gallium/drivers/d3d12/d3d12_nir_passes.c | 153 ++++++++++++++++----------- src/gallium/drivers/d3d12/d3d12_nir_passes.h | 2 +- 3 files changed, 96 insertions(+), 61 deletions(-) diff --git a/src/gallium/drivers/d3d12/d3d12_compiler.cpp b/src/gallium/drivers/d3d12/d3d12_compiler.cpp index 8bba7ebd2d2..9f142c5852b 100644 --- a/src/gallium/drivers/d3d12/d3d12_compiler.cpp +++ b/src/gallium/drivers/d3d12/d3d12_compiler.cpp @@ -1552,7 +1552,7 @@ d3d12_create_shader(struct d3d12_context *ctx, d3d12_shader_selector *next = get_next_shader(ctx, sel->stage); NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance); - NIR_PASS_V(nir, d3d12_split_multistream_varyings); + NIR_PASS_V(nir, d3d12_split_needed_varyings); if (nir->info.stage != MESA_SHADER_VERTEX) nir->info.inputs_read = diff --git a/src/gallium/drivers/d3d12/d3d12_nir_passes.c b/src/gallium/drivers/d3d12/d3d12_nir_passes.c index 530755cb551..284d827ae2d 100644 --- a/src/gallium/drivers/d3d12/d3d12_nir_passes.c +++ b/src/gallium/drivers/d3d12/d3d12_nir_passes.c @@ -839,35 +839,40 @@ d3d12_disable_multisampling(nir_shader *s) return progress; } -struct multistream_subvar_state { +struct var_split_subvar_state { nir_variable *var; uint8_t stream; uint8_t num_components; }; -struct multistream_var_state { +struct var_split_var_state { unsigned num_subvars; - struct multistream_subvar_state subvars[4]; + struct var_split_subvar_state subvars[4]; }; -struct multistream_state { - struct multistream_var_state vars[VARYING_SLOT_MAX]; +struct var_split_state { + struct var_split_var_state vars[2][VARYING_SLOT_MAX]; }; static bool -split_multistream_varying_stores(nir_builder *b, nir_intrinsic_instr *intr, +split_varying_accesses(nir_builder *b, nir_intrinsic_instr *intr, void *_state) { - if (intr->intrinsic != nir_intrinsic_store_deref) + if (intr->intrinsic != nir_intrinsic_store_deref && + intr->intrinsic != nir_intrinsic_load_deref) return false; nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); - if (!nir_deref_mode_is(deref, nir_var_shader_out)) + if (!nir_deref_mode_is(deref, nir_var_shader_out) && + !nir_deref_mode_is(deref, nir_var_shader_in)) return false; nir_variable *var = nir_deref_instr_get_variable(deref); - assert(var); + if (!var) + return false; + + uint32_t mode_index = deref->modes == nir_var_shader_out ? 0 : 1; - struct multistream_state *state = _state; - struct multistream_var_state *var_state = &state->vars[var->data.location]; + struct var_split_state *state = _state; + struct var_split_var_state *var_state = &state->vars[mode_index][var->data.location]; if (var_state->num_subvars <= 1) return false; @@ -876,6 +881,7 @@ split_multistream_varying_stores(nir_builder *b, nir_intrinsic_instr *intr, assert(path.path[0]->deref_type == nir_deref_type_var && path.path[0]->var == var); unsigned first_channel = 0; + nir_def *loads[2]; for (unsigned subvar = 0; subvar < var_state->num_subvars; ++subvar) { b->cursor = nir_after_instr(&path.path[0]->instr); nir_deref_instr *new_path = nir_build_deref_var(b, var_state->subvars[subvar].var); @@ -886,73 +892,102 @@ split_multistream_varying_stores(nir_builder *b, nir_intrinsic_instr *intr, } b->cursor = nir_before_instr(&intr->instr); - unsigned mask_num_channels = (1 << var_state->subvars[subvar].num_components) - 1; - unsigned orig_write_mask = nir_intrinsic_write_mask(intr); - nir_def *sub_value = nir_channels(b, intr->src[1].ssa, mask_num_channels << first_channel); - - first_channel += var_state->subvars[subvar].num_components; - - unsigned new_write_mask = (orig_write_mask >> first_channel) & mask_num_channels; - nir_build_store_deref(b, &new_path->def, sub_value, new_write_mask, nir_intrinsic_access(intr)); + if (intr->intrinsic == nir_intrinsic_store_deref) { + unsigned mask_num_channels = (1 << var_state->subvars[subvar].num_components) - 1; + unsigned orig_write_mask = nir_intrinsic_write_mask(intr); + nir_def *sub_value = nir_channels(b, intr->src[1].ssa, mask_num_channels << first_channel); + + first_channel += var_state->subvars[subvar].num_components; + + unsigned new_write_mask = (orig_write_mask >> first_channel) & mask_num_channels; + nir_build_store_deref(b, &new_path->def, sub_value, new_write_mask, nir_intrinsic_access(intr)); + } else { + /* The load path only handles splitting dvec3/dvec4 */ + assert(subvar == 0 || subvar == 1); + assert(intr->def.num_components >= 3); + loads[subvar] = nir_build_load_deref(b, var_state->subvars[subvar].num_components, intr->def.bit_size, &new_path->def); + } } nir_deref_path_finish(&path); + if (intr->intrinsic == nir_intrinsic_load_deref) { + nir_def *result = nir_extract_bits(b, loads, 2, 0, intr->def.num_components, intr->def.bit_size); + nir_def_rewrite_uses(&intr->def, result); + } nir_instr_free_and_dce(&intr->instr); return true; } bool -d3d12_split_multistream_varyings(nir_shader *s) +d3d12_split_needed_varyings(nir_shader *s) { - if (s->info.stage != MESA_SHADER_GEOMETRY) - return false; - - struct multistream_state state; + struct var_split_state state; memset(&state, 0, sizeof(state)); bool progress = false; - nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out) { - if ((var->data.stream & NIR_STREAM_PACKED) == 0) - continue; - - struct multistream_var_state *var_state = &state.vars[var->data.location]; - struct multistream_subvar_state *subvars = var_state->subvars; - for (unsigned i = 0; i < glsl_get_vector_elements(var->type); ++i) { - unsigned stream = (var->data.stream >> (2 * (i + var->data.location_frac))) & 0x3; - if (var_state->num_subvars == 0 || stream != subvars[var_state->num_subvars - 1].stream) { - subvars[var_state->num_subvars].stream = stream; - subvars[var_state->num_subvars].num_components = 1; - var_state->num_subvars++; - } else { - subvars[var_state->num_subvars - 1].num_components++; + nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out | nir_var_shader_in) { + uint32_t mode_index = var->data.mode == nir_var_shader_out ? 0 : 1; + struct var_split_var_state *var_state = &state.vars[mode_index][var->data.location]; + struct var_split_subvar_state *subvars = var_state->subvars; + if ((var->data.stream & NIR_STREAM_PACKED) != 0 && + s->info.stage == MESA_SHADER_GEOMETRY && + var->data.mode == nir_var_shader_out) { + for (unsigned i = 0; i < glsl_get_vector_elements(var->type); ++i) { + unsigned stream = (var->data.stream >> (2 * (i + var->data.location_frac))) & 0x3; + if (var_state->num_subvars == 0 || stream != subvars[var_state->num_subvars - 1].stream) { + subvars[var_state->num_subvars].stream = stream; + subvars[var_state->num_subvars].num_components = 1; + var_state->num_subvars++; + } else { + subvars[var_state->num_subvars - 1].num_components++; + } } - } - var->data.stream = subvars[0].stream; - if (var_state->num_subvars == 1) - continue; + var->data.stream = subvars[0].stream; + if (var_state->num_subvars == 1) + continue; - progress = true; - - subvars[0].var = var; - var->type = glsl_vector_type(glsl_get_base_type(var->type), subvars[0].num_components); - unsigned location_frac = var->data.location_frac + subvars[0].num_components; - for (unsigned subvar = 1; subvar < var_state->num_subvars; ++subvar) { - char *name = ralloc_asprintf(s, "unpacked:%s_stream%d", var->name, subvars[subvar].stream); - nir_variable *new_var = nir_variable_create(s, nir_var_shader_out, - glsl_vector_type(glsl_get_base_type(var->type), subvars[subvar].num_components), - name); - - new_var->data = var->data; - new_var->data.stream = subvars[subvar].stream; - new_var->data.location_frac = location_frac; - location_frac += subvars[subvar].num_components; - subvars[subvar].var = new_var; + progress = true; + + subvars[0].var = var; + var->type = glsl_vector_type(glsl_get_base_type(var->type), subvars[0].num_components); + unsigned location_frac = var->data.location_frac + subvars[0].num_components; + for (unsigned subvar = 1; subvar < var_state->num_subvars; ++subvar) { + char *name = ralloc_asprintf(s, "unpacked:%s_stream%d", var->name, subvars[subvar].stream); + nir_variable *new_var = nir_variable_create(s, nir_var_shader_out, + glsl_vector_type(glsl_get_base_type(var->type), subvars[subvar].num_components), + name); + + new_var->data = var->data; + new_var->data.stream = subvars[subvar].stream; + new_var->data.location_frac = location_frac; + location_frac += subvars[subvar].num_components; + subvars[subvar].var = new_var; + } + } else if (glsl_type_is_64bit(glsl_without_array(var->type)) && + glsl_get_components(glsl_without_array(var->type)) >= 3) { + progress = true; + assert(var->data.location_frac == 0); + uint32_t components = glsl_get_components(glsl_without_array(var->type)); + var_state->num_subvars = 2; + subvars[0].var = var; + subvars[0].num_components = 2; + subvars[0].stream = var->data.stream; + const struct glsl_type *base_type = glsl_without_array(var->type); + var->type = glsl_type_wrap_in_arrays(glsl_vector_type(glsl_get_base_type(base_type), 2), var->type); + + subvars[1].var = nir_variable_clone(var, s); + subvars[1].num_components = components - 2; + subvars[1].stream = var->data.stream; + exec_node_insert_after(&var->node, &subvars[1].var->node); + subvars[1].var->type = glsl_type_wrap_in_arrays(glsl_vector_type(glsl_get_base_type(base_type), components - 2), var->type); + subvars[1].var->data.location++; + subvars[1].var->data.driver_location++; } } if (progress) { - nir_shader_intrinsics_pass(s, split_multistream_varying_stores, + nir_shader_intrinsics_pass(s, split_varying_accesses, nir_metadata_block_index | nir_metadata_dominance, &state); } else { diff --git a/src/gallium/drivers/d3d12/d3d12_nir_passes.h b/src/gallium/drivers/d3d12/d3d12_nir_passes.h index 4fd2c673914..ee3560dac05 100644 --- a/src/gallium/drivers/d3d12/d3d12_nir_passes.h +++ b/src/gallium/drivers/d3d12/d3d12_nir_passes.h @@ -101,7 +101,7 @@ bool d3d12_disable_multisampling(nir_shader *s); bool -d3d12_split_multistream_varyings(nir_shader *s); +d3d12_split_needed_varyings(nir_shader *s); void d3d12_write_0_to_new_varying(nir_shader *s, nir_variable *var);
