By using the live variables code for determining interference, we can handle coalescing in the presence of control flow, which the other register coalescing path couldn't.
Total instructions: 207184 -> 206990 74/1246 programs affected (5.9%) 33993 -> 33799 instructions in affected programs (0.6% reduction) There is a newerth shader that loses out, because of some extra MOVs that now get their dead-code nature obscured by coalescing. This should be fixed by doing better at dead code elimination. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 61 ++++++++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 1 + 2 files changed, 62 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index a723cd8..0fe97dd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1218,6 +1218,66 @@ fs_visitor::dead_code_eliminate() return progress; } +/** + * Implements a second type of register coalescing: This one checks if + * the two regs involved in a raw move don't interfere, in which case + * they can both by stored in the same place and the MOV removed. + */ +bool +fs_visitor::register_coalesce_2() +{ + bool progress = false; + + calculate_live_intervals(); + + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + if (inst->opcode != BRW_OPCODE_MOV || + inst->predicated || + inst->saturate || + inst->src[0].file != GRF || + inst->src[0].negate || + inst->src[0].abs || + inst->src[0].smear != -1 || + inst->dst.file != GRF || + inst->dst.type != inst->src[0].type || + virtual_grf_sizes[inst->src[0].reg] != 1 || + virtual_grf_interferes(inst->dst.reg, inst->src[0].reg)) { + continue; + } + + int reg_from = inst->src[0].reg; + assert(inst->src[0].reg_offset == 0); + int reg_to = inst->dst.reg; + int reg_to_offset = inst->dst.reg_offset; + + foreach_list_safe(node, &this->instructions) { + fs_inst *scan_inst = (fs_inst *)node; + + if (scan_inst->dst.file == GRF && + scan_inst->dst.reg == reg_from) { + scan_inst->dst.reg = reg_to; + scan_inst->dst.reg_offset = reg_to_offset; + } + for (int i = 0; i < 3; i++) { + if (scan_inst->src[i].file == GRF && + scan_inst->src[i].reg == reg_from) { + scan_inst->src[i].reg = reg_to; + scan_inst->src[i].reg_offset = reg_to_offset; + } + } + } + + inst->remove(); + live_intervals_valid = false; + progress = true; + continue; + } + + return progress; +} + bool fs_visitor::register_coalesce() { @@ -1683,6 +1743,7 @@ fs_visitor::run() progress = opt_algebraic() || progress; progress = opt_copy_propagate() || progress; progress = register_coalesce() || progress; + progress = register_coalesce_2() || progress; progress = compute_to_mrf() || progress; progress = dead_code_eliminate() || progress; } while (progress); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 8ae855e..ae4656b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -513,6 +513,7 @@ public: bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block, exec_list *acp); bool register_coalesce(); + bool register_coalesce_2(); bool compute_to_mrf(); bool dead_code_eliminate(); bool remove_dead_constants(); -- 1.7.10 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev