Module: Mesa Branch: main Commit: a9f95bf687e8655607f7a6faf87388708fe92998 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a9f95bf687e8655607f7a6faf87388708fe92998
Author: Caio Oliveira <[email protected]> Date: Fri Oct 20 10:32:54 2023 -0700 intel/compiler: Reuse same scheduler for all pre-RA scheduling modes Reviewed-by: Matt Turner <[email protected]> Reviewed-by: Ian Romanick <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25841> --- src/intel/compiler/brw_fs.cpp | 9 ++++- src/intel/compiler/brw_fs.h | 8 +++- src/intel/compiler/brw_schedule_instructions.cpp | 49 ++++++++++++++++-------- 3 files changed, 47 insertions(+), 19 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 62407eb47a2..606026b1cae 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6928,6 +6928,9 @@ fs_visitor::allocate_registers(bool allow_spilling) fs_inst **orig_order = save_instruction_order(cfg); fs_inst **best_pressure_order = NULL; + void *scheduler_ctx = ralloc_context(NULL); + fs_instruction_scheduler *sched = prepare_scheduler(scheduler_ctx); + /* Try each scheduling heuristic to see if it can successfully register * allocate without spilling. They should be ordered by decreasing * performance but increasing likelihood of allocating. @@ -6935,7 +6938,7 @@ fs_visitor::allocate_registers(bool allow_spilling) for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) { enum instruction_scheduler_mode sched_mode = pre_modes[i]; - schedule_instructions(sched_mode); + schedule_instructions_pre_ra(sched, sched_mode); this->shader_stats.scheduler_mode = scheduler_mode_name[sched_mode]; debug_optimizer(nir, shader_stats.scheduler_mode, 95, i); @@ -6973,6 +6976,8 @@ fs_visitor::allocate_registers(bool allow_spilling) invalidate_analysis(DEPENDENCY_INSTRUCTIONS); } + ralloc_free(scheduler_ctx); + if (!allocated) { if (0) { fprintf(stderr, "Spilling - using lowest-pressure mode \"%s\"\n", @@ -7009,7 +7014,7 @@ fs_visitor::allocate_registers(bool allow_spilling) opt_bank_conflicts(); - schedule_instructions(SCHEDULE_POST); + schedule_instructions_post_ra(); if (last_scratch > 0) { ASSERTED unsigned max_scratch_size = 2 * 1024 * 1024; diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index f22af018684..02919a44ece 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -183,6 +183,8 @@ struct brw_fs_bind_info { unsigned binding; }; +class fs_instruction_scheduler; + /** * The fragment shader front-end. * @@ -279,7 +281,11 @@ public: bool remove_duplicate_mrf_writes(); bool remove_extra_rounding_modes(); - void schedule_instructions(instruction_scheduler_mode mode); + fs_instruction_scheduler *prepare_scheduler(void *mem_ctx); + void schedule_instructions_pre_ra(fs_instruction_scheduler *sched, + instruction_scheduler_mode mode); + void schedule_instructions_post_ra(); + void insert_gfx4_send_dependency_workarounds(); void insert_gfx4_pre_send_dependency_workarounds(bblock_t *block, fs_inst *inst); diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 739cf5b1402..9b20431fab2 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -31,6 +31,7 @@ #include "brw_vec4.h" #include "brw_cfg.h" #include "brw_shader.h" +#include <new> using namespace brw; @@ -736,8 +737,7 @@ class fs_instruction_scheduler : public instruction_scheduler { public: fs_instruction_scheduler(void *mem_ctx, const fs_visitor *v, int grf_count, int hw_reg_count, - int block_count, - instruction_scheduler_mode mode); + int block_count, bool post_reg_alloc); void calculate_deps(); bool is_compressed(const fs_inst *inst); schedule_node *choose_instruction_to_schedule(); @@ -750,7 +750,7 @@ public: void clear_last_grf_write(); void schedule_instructions(); - void run(); + void run(instruction_scheduler_mode mode); const fs_visitor *v; unsigned hw_reg_count; @@ -803,14 +803,13 @@ public: fs_instruction_scheduler::fs_instruction_scheduler(void *mem_ctx, const fs_visitor *v, int grf_count, int hw_reg_count, - int block_count, - instruction_scheduler_mode mode) + int block_count, bool post_reg_alloc) : instruction_scheduler(mem_ctx, v, grf_count, /* grf_write_scale */ 16, - /* post_reg_alloc */ (mode == SCHEDULE_POST)), + post_reg_alloc), v(v) { this->hw_reg_count = hw_reg_count; - this->mode = mode; + this->mode = SCHEDULE_NONE; this->reg_pressure = 0; if (!post_reg_alloc) { @@ -1959,8 +1958,10 @@ fs_instruction_scheduler::schedule_instructions() } void -fs_instruction_scheduler::run() +fs_instruction_scheduler::run(instruction_scheduler_mode mode) { + this->mode = mode; + if (debug && !post_reg_alloc) { fprintf(stderr, "\nInstructions before scheduling (reg_alloc %d)\n", post_reg_alloc); @@ -2019,23 +2020,39 @@ vec4_instruction_scheduler::run() } } +fs_instruction_scheduler * +fs_visitor::prepare_scheduler(void *mem_ctx) +{ + const int grf_count = alloc.count; + + fs_instruction_scheduler *empty = rzalloc(mem_ctx, fs_instruction_scheduler); + return new (empty) fs_instruction_scheduler(mem_ctx, this, grf_count, first_non_payload_grf, + cfg->num_blocks, /* post_reg_alloc */ false); +} + void -fs_visitor::schedule_instructions(instruction_scheduler_mode mode) +fs_visitor::schedule_instructions_pre_ra(fs_instruction_scheduler *sched, + instruction_scheduler_mode mode) { if (mode == SCHEDULE_NONE) return; - int grf_count; - if (mode == SCHEDULE_POST) - grf_count = reg_unit(devinfo) * grf_used; - else - grf_count = alloc.count; + sched->run(mode); + + invalidate_analysis(DEPENDENCY_INSTRUCTIONS); +} + +void +fs_visitor::schedule_instructions_post_ra() +{ + const bool post_reg_alloc = true; + const int grf_count = reg_unit(devinfo) * grf_used; void *mem_ctx = ralloc_context(NULL); fs_instruction_scheduler sched(mem_ctx, this, grf_count, first_non_payload_grf, - cfg->num_blocks, mode); - sched.run(); + cfg->num_blocks, post_reg_alloc); + sched.run(SCHEDULE_POST); ralloc_free(mem_ctx);
