Mesa (main): intel/compiler: Reuse same scheduler for all pre-RA scheduling modes

GitLab Mirror Mon, 13 Nov 2023 15:42:03 -0800

Module: Mesa
Branch: main
Commit: a9f95bf687e8655607f7a6faf87388708fe92998
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a9f95bf687e8655607f7a6faf87388708fe92998


Author: Caio Oliveira <[email protected]>
Date:   Fri Oct 20 10:32:54 2023 -0700

intel/compiler: Reuse same scheduler for all pre-RA scheduling modes

Reviewed-by: Matt Turner <[email protected]>
Reviewed-by: Ian Romanick <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25841>

---

 src/intel/compiler/brw_fs.cpp                    |  9 ++++-
 src/intel/compiler/brw_fs.h                      |  8 +++-
 src/intel/compiler/brw_schedule_instructions.cpp | 49 ++++++++++++++++--------
 3 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 62407eb47a2..606026b1cae 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -6928,6 +6928,9 @@ fs_visitor::allocate_registers(bool allow_spilling)
    fs_inst **orig_order = save_instruction_order(cfg);
    fs_inst **best_pressure_order = NULL;
 
+   void *scheduler_ctx = ralloc_context(NULL);
+   fs_instruction_scheduler *sched = prepare_scheduler(scheduler_ctx);
+
    /* Try each scheduling heuristic to see if it can successfully register
     * allocate without spilling.  They should be ordered by decreasing
     * performance but increasing likelihood of allocating.
@@ -6935,7 +6938,7 @@ fs_visitor::allocate_registers(bool allow_spilling)
    for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
       enum instruction_scheduler_mode sched_mode = pre_modes[i];
 
-      schedule_instructions(sched_mode);
+      schedule_instructions_pre_ra(sched, sched_mode);
       this->shader_stats.scheduler_mode = scheduler_mode_name[sched_mode];
 
       debug_optimizer(nir, shader_stats.scheduler_mode, 95, i);
@@ -6973,6 +6976,8 @@ fs_visitor::allocate_registers(bool allow_spilling)
       invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
    }
 
+   ralloc_free(scheduler_ctx);
+
    if (!allocated) {
       if (0) {
          fprintf(stderr, "Spilling - using lowest-pressure mode \"%s\"\n",
@@ -7009,7 +7014,7 @@ fs_visitor::allocate_registers(bool allow_spilling)
 
    opt_bank_conflicts();
 
-   schedule_instructions(SCHEDULE_POST);
+   schedule_instructions_post_ra();
 
    if (last_scratch > 0) {
       ASSERTED unsigned max_scratch_size = 2 * 1024 * 1024;
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index f22af018684..02919a44ece 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -183,6 +183,8 @@ struct brw_fs_bind_info {
    unsigned binding;
 };
 
+class fs_instruction_scheduler;
+
 /**
  * The fragment shader front-end.
  *
@@ -279,7 +281,11 @@ public:
    bool remove_duplicate_mrf_writes();
    bool remove_extra_rounding_modes();
 
-   void schedule_instructions(instruction_scheduler_mode mode);
+   fs_instruction_scheduler *prepare_scheduler(void *mem_ctx);
+   void schedule_instructions_pre_ra(fs_instruction_scheduler *sched,
+                                     instruction_scheduler_mode mode);
+   void schedule_instructions_post_ra();
+
    void insert_gfx4_send_dependency_workarounds();
    void insert_gfx4_pre_send_dependency_workarounds(bblock_t *block,
                                                     fs_inst *inst);
diff --git a/src/intel/compiler/brw_schedule_instructions.cpp 
b/src/intel/compiler/brw_schedule_instructions.cpp
index 739cf5b1402..9b20431fab2 100644
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@@ -31,6 +31,7 @@
 #include "brw_vec4.h"
 #include "brw_cfg.h"
 #include "brw_shader.h"
+#include <new>
 
 using namespace brw;
 
@@ -736,8 +737,7 @@ class fs_instruction_scheduler : public 
instruction_scheduler
 {
 public:
    fs_instruction_scheduler(void *mem_ctx, const fs_visitor *v, int grf_count, 
int hw_reg_count,
-                            int block_count,
-                            instruction_scheduler_mode mode);
+                            int block_count, bool post_reg_alloc);
    void calculate_deps();
    bool is_compressed(const fs_inst *inst);
    schedule_node *choose_instruction_to_schedule();
@@ -750,7 +750,7 @@ public:
    void clear_last_grf_write();
 
    void schedule_instructions();
-   void run();
+   void run(instruction_scheduler_mode mode);
 
    const fs_visitor *v;
    unsigned hw_reg_count;
@@ -803,14 +803,13 @@ public:
 
 fs_instruction_scheduler::fs_instruction_scheduler(void *mem_ctx, const 
fs_visitor *v,
                                                    int grf_count, int 
hw_reg_count,
-                                                   int block_count,
-                                                   instruction_scheduler_mode 
mode)
+                                                   int block_count, bool 
post_reg_alloc)
    : instruction_scheduler(mem_ctx, v, grf_count, /* grf_write_scale */ 16,
-                           /* post_reg_alloc */ (mode == SCHEDULE_POST)),
+                           post_reg_alloc),
      v(v)
 {
    this->hw_reg_count = hw_reg_count;
-   this->mode = mode;
+   this->mode = SCHEDULE_NONE;
    this->reg_pressure = 0;
 
    if (!post_reg_alloc) {
@@ -1959,8 +1958,10 @@ fs_instruction_scheduler::schedule_instructions()
 }
 
 void
-fs_instruction_scheduler::run()
+fs_instruction_scheduler::run(instruction_scheduler_mode mode)
 {
+   this->mode = mode;
+
    if (debug && !post_reg_alloc) {
       fprintf(stderr, "\nInstructions before scheduling (reg_alloc %d)\n",
               post_reg_alloc);
@@ -2019,23 +2020,39 @@ vec4_instruction_scheduler::run()
    }
 }
 
+fs_instruction_scheduler *
+fs_visitor::prepare_scheduler(void *mem_ctx)
+{
+   const int grf_count = alloc.count;
+
+   fs_instruction_scheduler *empty = rzalloc(mem_ctx, 
fs_instruction_scheduler);
+   return new (empty) fs_instruction_scheduler(mem_ctx, this, grf_count, 
first_non_payload_grf,
+                                               cfg->num_blocks, /* 
post_reg_alloc */ false);
+}
+
 void
-fs_visitor::schedule_instructions(instruction_scheduler_mode mode)
+fs_visitor::schedule_instructions_pre_ra(fs_instruction_scheduler *sched,
+                                         instruction_scheduler_mode mode)
 {
    if (mode == SCHEDULE_NONE)
       return;
 
-   int grf_count;
-   if (mode == SCHEDULE_POST)
-      grf_count = reg_unit(devinfo) * grf_used;
-   else
-      grf_count = alloc.count;
+   sched->run(mode);
+
+   invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
+}
+
+void
+fs_visitor::schedule_instructions_post_ra()
+{
+   const bool post_reg_alloc = true;
+   const int grf_count = reg_unit(devinfo) * grf_used;
 
    void *mem_ctx = ralloc_context(NULL);
 
    fs_instruction_scheduler sched(mem_ctx, this, grf_count, 
first_non_payload_grf,
-                                  cfg->num_blocks, mode);
-   sched.run();
+                                  cfg->num_blocks, post_reg_alloc);
+   sched.run(SCHEDULE_POST);
 
    ralloc_free(mem_ctx);

Mesa (main): intel/compiler: Reuse same scheduler for all pre-RA scheduling modes

Reply via email to