Module: Mesa Branch: main Commit: 2d273c520c9b65f6567d8af29e387867d80a06ec URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2d273c520c9b65f6567d8af29e387867d80a06ec
Author: Job Noorman <[email protected]> Date: Fri Dec 15 11:43:46 2023 +0100 ir3: lower 64b registers before creating preamble ir3_nir_lower_preamble cannot handle 64b @load/store_preamble so we have to make sure ir3_nir_opt_preamble will never produce them. Up to now, nir_lower_locals_to_regs was run after preamble lowering so 64b locals could still be around when lowering the preamble. This patch moves running this pass, as well as ir3_nir_lower_64b_regs, to before the preamble lowering. Fixed Piglit tests: - spec@arb_gpu_shader_fp64@execution@fs-indirect-temp-double-dst - spec@arb_gpu_shader_fp64@execution@built-in-functions@fs-frexp-dvec4-variable-index This patch has no impact on shader-db. Note: a few cleanup passes used to be run after nir_lower_locals_to_regs (nir_opt_algebraic, nir_opt_constant_folding) and after ir3_nir_lower_64b_regs (nir_lower_alu_to_scalar, nir_copy_prop). As far as I can tell, these are not necessary anymore when running the register lowering earlier so this patch removes them. Signed-off-by: Job Noorman <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26737> --- src/freedreno/ir3/ir3_context.c | 28 ---------------------------- src/freedreno/ir3/ir3_nir.c | 7 +++++++ src/freedreno/ir3/ir3_nir_opt_preamble.c | 5 +++-- 3 files changed, 10 insertions(+), 30 deletions(-) diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 4e540ee59ea..f57f8118020 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -84,42 +84,14 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader, ctx->s = nir_shader_clone(ctx, shader->nir); ir3_nir_lower_variant(so, ctx->s); - /* this needs to be the last pass run, so do this here instead of - * in ir3_optimize_nir(): - */ bool progress = false; bool needs_late_alg = false; - NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs, 1); - - if (progress) { - bool regs_progress = false; - - /* Split 64b registers into two 32b ones. */ - NIR_PASS(regs_progress, ctx->s, ir3_nir_lower_64b_regs); - - if (regs_progress) { - /* After splitting registers, we might still have some 64b vecs. Run - * some passes to get rid of them. - */ - NIR_PASS_V(ctx->s, nir_lower_alu_to_scalar, NULL, NULL); - NIR_PASS_V(ctx->s, nir_copy_prop); - } - } - - /* we could need cleanup after lower_locals_to_regs */ - while (progress) { - progress = false; - NIR_PASS(progress, ctx->s, nir_opt_algebraic); - NIR_PASS(progress, ctx->s, nir_opt_constant_folding); - needs_late_alg = true; - } /* We want to lower nir_op_imul as late as possible, to catch also * those generated by earlier passes (e.g, * nir_lower_locals_to_regs). However, we want a final swing of a * few passes to have a chance at optimizing the result. */ - progress = false; NIR_PASS(progress, ctx->s, ir3_nir_lower_imul); while (progress) { progress = false; diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 07b212f4cc9..2cdce66d998 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -722,6 +722,13 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) /* Lower scratch writemasks */ progress |= OPT(s, nir_lower_wrmasks, should_split_wrmask, s); + if (OPT(s, nir_lower_locals_to_regs, 1)) { + progress = true; + + /* Split 64b registers into two 32b ones. */ + OPT_V(s, ir3_nir_lower_64b_regs); + } + progress |= OPT(s, ir3_nir_lower_wide_load_store); progress |= OPT(s, ir3_nir_lower_64b_global); progress |= OPT(s, ir3_nir_lower_64b_intrinsics); diff --git a/src/freedreno/ir3/ir3_nir_opt_preamble.c b/src/freedreno/ir3/ir3_nir_opt_preamble.c index 28a95d5e3b3..cd7926ab252 100644 --- a/src/freedreno/ir3/ir3_nir_opt_preamble.c +++ b/src/freedreno/ir3/ir3_nir_opt_preamble.c @@ -440,8 +440,9 @@ ir3_nir_lower_preamble(nir_shader *nir, struct ir3_shader_variant *v) * ... */ - b->cursor = nir_before_impl(main); - + /* @decl_regs need to stay in the first block. */ + b->cursor = nir_after_reg_decls(main); + nir_if *outer_if = nir_push_if(b, nir_preamble_start_ir3(b, 1)); { nir_if *inner_if = nir_push_if(b, nir_elect(b, 1));
