Module: Mesa Branch: main Commit: 286caa5080703a436f313fe8a575b8ec38657d50 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=286caa5080703a436f313fe8a575b8ec38657d50
Author: Job Noorman <[email protected]> Date: Fri Nov 10 11:57:34 2023 +0100 ir3: lower 64b registers After all int64/double lowerings, there might still be 64b registers left which ir3 currently doesn't handle. This only happens in a small number of Piglit tests where those registers (or the variables they come from) did not get DCE'd. This patch handles 64b registers in ir3 by adding a NIR pass that does the following: - @decl_reg -> split in two 32b ones - @store_reg -> unpack_64_2x32_split_x/y and two separate stores - @load_reg -> two separate loads and pack_64_2x32_split After this pass, the 64b vecs used for the original loads/stores are still present and are also not handled yet by ir3. This patch removes them by running nir_lower_alu_to_scalar and nir_copy_prop. Signed-off-by: Job Noorman <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26175> --- src/freedreno/ci/freedreno-a618-fails.txt | 6 -- src/freedreno/ci/freedreno-a630-fails.txt | 6 -- src/freedreno/ir3/ir3_context.c | 15 +++++ src/freedreno/ir3/ir3_nir.h | 1 + src/freedreno/ir3/ir3_nir_lower_64b.c | 97 +++++++++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 12 deletions(-) diff --git a/src/freedreno/ci/freedreno-a618-fails.txt b/src/freedreno/ci/freedreno-a618-fails.txt index efd88a3b535..c2ffed11f95 100644 --- a/src/freedreno/ci/freedreno-a618-fails.txt +++ b/src/freedreno/ci/freedreno-a618-fails.txt @@ -106,16 +106,10 @@ spec@arb_shader_image_load_store@qualifiers@r8/strict layout qualifiers/permissi # ir3_nir_lower_tess.c:251: lower_block_to_explicit_output: Assertion `util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1)' failed. spec@arb_tessellation_shader@execution@tcs-input-read-mat,Crash -# Some 64b not getting lowered to 32b: -spec@arb_tessellation_shader@execution@variable-indexing@vs-output-array-dvec4-index-wr-before-tcs,Crash - spec@arb_texture_rectangle@1-1-linear-texture,Fail spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match,Fail -# fails unrelated to GL_ARB_enhanced_layouts -spec@arb_enhanced_layouts@execution@component-layout@vs-fs-array-dvec3,Crash - # fails on gen1 (a618/a630) with both fd and zink, but passes on gen4.. # maybe gen1 sqe doesn't handle the count==0 case? spec@arb_indirect_parameters@tf-count-arrays,Fail diff --git a/src/freedreno/ci/freedreno-a630-fails.txt b/src/freedreno/ci/freedreno-a630-fails.txt index 3c6a15eb89d..fb7bf5c0f1e 100644 --- a/src/freedreno/ci/freedreno-a630-fails.txt +++ b/src/freedreno/ci/freedreno-a630-fails.txt @@ -109,16 +109,10 @@ spec@arb_shader_image_load_store@qualifiers@r8/strict layout qualifiers/permissi # ir3_nir_lower_tess.c:251: lower_block_to_explicit_output: Assertion `util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1)' failed. spec@arb_tessellation_shader@execution@tcs-input-read-mat,Crash -# Some 64b not getting lowered to 32b: -spec@arb_tessellation_shader@execution@variable-indexing@vs-output-array-dvec4-index-wr-before-tcs,Crash - spec@arb_texture_rectangle@1-1-linear-texture,Fail spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match,Fail -# fails unrelated to GL_ARB_enhanced_layouts -spec@arb_enhanced_layouts@execution@component-layout@vs-fs-array-dvec3,Crash - # fails on gen1 (a618/a630) with both fd and zink, but passes on gen4.. # maybe gen1 sqe doesn't handle the count==0 case? spec@arb_indirect_parameters@tf-count-arrays,Fail diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 4cc3038d51b..4e540ee59ea 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -91,6 +91,21 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader, bool needs_late_alg = false; NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs, 1); + if (progress) { + bool regs_progress = false; + + /* Split 64b registers into two 32b ones. */ + NIR_PASS(regs_progress, ctx->s, ir3_nir_lower_64b_regs); + + if (regs_progress) { + /* After splitting registers, we might still have some 64b vecs. Run + * some passes to get rid of them. + */ + NIR_PASS_V(ctx->s, nir_lower_alu_to_scalar, NULL, NULL); + NIR_PASS_V(ctx->s, nir_copy_prop); + } + } + /* we could need cleanup after lower_locals_to_regs */ while (progress) { progress = false; diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index de6b70e7877..a4adde07225 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -65,6 +65,7 @@ void ir3_nir_lower_gs(nir_shader *shader); bool ir3_nir_lower_64b_intrinsics(nir_shader *shader); bool ir3_nir_lower_64b_undef(nir_shader *shader); bool ir3_nir_lower_64b_global(nir_shader *shader); +bool ir3_nir_lower_64b_regs(nir_shader *shader); void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s); void ir3_nir_lower_io_to_temporaries(nir_shader *s); diff --git a/src/freedreno/ir3/ir3_nir_lower_64b.c b/src/freedreno/ir3/ir3_nir_lower_64b.c index e22a05cb622..5d35743c626 100644 --- a/src/freedreno/ir3/ir3_nir_lower_64b.c +++ b/src/freedreno/ir3/ir3_nir_lower_64b.c @@ -299,3 +299,100 @@ ir3_nir_lower_64b_global(nir_shader *shader) shader, lower_64b_global_filter, lower_64b_global, NULL); } + +/* + * Lowering for 64b registers: + * - @decl_reg -> split in two 32b ones + * - @store_reg -> unpack_64_2x32_split_x/y and two separate stores + * - @load_reg -> two separate loads and pack_64_2x32_split + */ + +static void +lower_64b_reg(nir_builder *b, nir_intrinsic_instr *reg) +{ + unsigned num_components = nir_intrinsic_num_components(reg); + unsigned num_array_elems = nir_intrinsic_num_array_elems(reg); + + nir_def *reg_hi = nir_decl_reg(b, num_components, 32, num_array_elems); + nir_def *reg_lo = nir_decl_reg(b, num_components, 32, num_array_elems); + + nir_foreach_reg_store_safe (store_reg_src, reg) { + nir_intrinsic_instr *store = + nir_instr_as_intrinsic(nir_src_parent_instr(store_reg_src)); + b->cursor = nir_before_instr(&store->instr); + + nir_def *packed = store->src[0].ssa; + nir_def *unpacked_lo = nir_unpack_64_2x32_split_x(b, packed); + nir_def *unpacked_hi = nir_unpack_64_2x32_split_y(b, packed); + int base = nir_intrinsic_base(store); + + if (store->intrinsic == nir_intrinsic_store_reg) { + nir_build_store_reg(b, unpacked_lo, reg_lo, .base = base); + nir_build_store_reg(b, unpacked_hi, reg_hi, .base = base); + } else { + assert(store->intrinsic == nir_intrinsic_store_reg_indirect); + + nir_def *offset = store->src[2].ssa; + nir_store_reg_indirect(b, unpacked_lo, reg_lo, offset, .base = base); + nir_store_reg_indirect(b, unpacked_hi, reg_hi, offset, .base = base); + } + + nir_instr_remove(&store->instr); + } + + nir_foreach_reg_load_safe (load_reg_src, reg) { + nir_intrinsic_instr *load = + nir_instr_as_intrinsic(nir_src_parent_instr(load_reg_src)); + b->cursor = nir_before_instr(&load->instr); + + int base = nir_intrinsic_base(load); + nir_def *load_lo, *load_hi; + + if (load->intrinsic == nir_intrinsic_load_reg) { + load_lo = + nir_build_load_reg(b, num_components, 32, reg_lo, .base = base); + load_hi = + nir_build_load_reg(b, num_components, 32, reg_hi, .base = base); + } else { + assert(load->intrinsic == nir_intrinsic_load_reg_indirect); + + nir_def *offset = load->src[1].ssa; + load_lo = nir_load_reg_indirect(b, num_components, 32, reg_lo, offset, + .base = base); + load_hi = nir_load_reg_indirect(b, num_components, 32, reg_hi, offset, + .base = base); + } + + nir_def *packed = nir_pack_64_2x32_split(b, load_lo, load_hi); + nir_def_rewrite_uses(&load->def, packed); + nir_instr_remove(&load->instr); + } + + nir_instr_remove(®->instr); +} + +bool +ir3_nir_lower_64b_regs(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function_impl (impl, shader) { + bool impl_progress = false; + nir_builder b = nir_builder_create(impl); + + nir_foreach_reg_decl_safe (reg, impl) { + if (nir_intrinsic_bit_size(reg) == 64) { + lower_64b_reg(&b, reg); + impl_progress = true; + } + } + + if (impl_progress) { + nir_metadata_preserve( + impl, nir_metadata_block_index | nir_metadata_dominance); + progress = true; + } + } + + return progress; +}
