Module: Mesa
Branch: main
Commit: 2d273c520c9b65f6567d8af29e387867d80a06ec
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2d273c520c9b65f6567d8af29e387867d80a06ec

Author: Job Noorman <[email protected]>
Date:   Fri Dec 15 11:43:46 2023 +0100

ir3: lower 64b registers before creating preamble

ir3_nir_lower_preamble cannot handle 64b @load/store_preamble so we have
to make sure ir3_nir_opt_preamble will never produce them. Up to now,
nir_lower_locals_to_regs was run after preamble lowering so 64b locals
could still be around when lowering the preamble. This patch moves
running this pass, as well as ir3_nir_lower_64b_regs, to before the
preamble lowering.

Fixed Piglit tests:
- spec@arb_gpu_shader_fp64@execution@fs-indirect-temp-double-dst
- 
spec@arb_gpu_shader_fp64@execution@built-in-functions@fs-frexp-dvec4-variable-index

This patch has no impact on shader-db.

Note: a few cleanup passes used to be run after nir_lower_locals_to_regs
(nir_opt_algebraic, nir_opt_constant_folding) and after
ir3_nir_lower_64b_regs (nir_lower_alu_to_scalar, nir_copy_prop). As far
as I can tell, these are not necessary anymore when running the register
lowering earlier so this patch removes them.

Signed-off-by: Job Noorman <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26737>

---

 src/freedreno/ir3/ir3_context.c          | 28 ----------------------------
 src/freedreno/ir3/ir3_nir.c              |  7 +++++++
 src/freedreno/ir3/ir3_nir_opt_preamble.c |  5 +++--
 3 files changed, 10 insertions(+), 30 deletions(-)

diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c
index 4e540ee59ea..f57f8118020 100644
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -84,42 +84,14 @@ ir3_context_init(struct ir3_compiler *compiler, struct 
ir3_shader *shader,
    ctx->s = nir_shader_clone(ctx, shader->nir);
    ir3_nir_lower_variant(so, ctx->s);
 
-   /* this needs to be the last pass run, so do this here instead of
-    * in ir3_optimize_nir():
-    */
    bool progress = false;
    bool needs_late_alg = false;
-   NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs, 1);
-
-   if (progress) {
-      bool regs_progress = false;
-
-      /* Split 64b registers into two 32b ones. */
-      NIR_PASS(regs_progress, ctx->s, ir3_nir_lower_64b_regs);
-
-      if (regs_progress) {
-         /* After splitting registers, we might still have some 64b vecs. Run
-          * some passes to get rid of them.
-          */
-         NIR_PASS_V(ctx->s, nir_lower_alu_to_scalar, NULL, NULL);
-         NIR_PASS_V(ctx->s, nir_copy_prop);
-      }
-   }
-
-   /* we could need cleanup after lower_locals_to_regs */
-   while (progress) {
-      progress = false;
-      NIR_PASS(progress, ctx->s, nir_opt_algebraic);
-      NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
-      needs_late_alg = true;
-   }
 
    /* We want to lower nir_op_imul as late as possible, to catch also
     * those generated by earlier passes (e.g,
     * nir_lower_locals_to_regs).  However, we want a final swing of a
     * few passes to have a chance at optimizing the result.
     */
-   progress = false;
    NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
    while (progress) {
       progress = false;
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 07b212f4cc9..2cdce66d998 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -722,6 +722,13 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, 
nir_shader *s)
    /* Lower scratch writemasks */
    progress |= OPT(s, nir_lower_wrmasks, should_split_wrmask, s);
 
+   if (OPT(s, nir_lower_locals_to_regs, 1)) {
+      progress = true;
+
+      /* Split 64b registers into two 32b ones. */
+      OPT_V(s, ir3_nir_lower_64b_regs);
+   }
+
    progress |= OPT(s, ir3_nir_lower_wide_load_store);
    progress |= OPT(s, ir3_nir_lower_64b_global);
    progress |= OPT(s, ir3_nir_lower_64b_intrinsics);
diff --git a/src/freedreno/ir3/ir3_nir_opt_preamble.c 
b/src/freedreno/ir3/ir3_nir_opt_preamble.c
index 28a95d5e3b3..cd7926ab252 100644
--- a/src/freedreno/ir3/ir3_nir_opt_preamble.c
+++ b/src/freedreno/ir3/ir3_nir_opt_preamble.c
@@ -440,8 +440,9 @@ ir3_nir_lower_preamble(nir_shader *nir, struct 
ir3_shader_variant *v)
     * ...
     */
 
-   b->cursor = nir_before_impl(main);
-   
+   /* @decl_regs need to stay in the first block. */
+   b->cursor = nir_after_reg_decls(main);
+
    nir_if *outer_if = nir_push_if(b, nir_preamble_start_ir3(b, 1));
    {
       nir_if *inner_if = nir_push_if(b, nir_elect(b, 1));

Reply via email to