Module: Mesa Branch: main Commit: b2420fae4b19adffe3d8edfb283e547b953f1cf3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b2420fae4b19adffe3d8edfb283e547b953f1cf3
Author: M Henning <[email protected]> Date: Sat Nov 25 15:56:00 2023 -0500 nak: Add a jump threading pass This saves 16 instructions on the compute shader in Sascha Willems' computecloth example. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26473> --- src/nouveau/compiler/nak/api.rs | 1 + src/nouveau/compiler/nak/cfg.rs | 5 + src/nouveau/compiler/nak/ir.rs | 4 +- src/nouveau/compiler/nak/lib.rs | 1 + src/nouveau/compiler/nak/opt_jump_thread.rs | 136 ++++++++++++++++++++++++++++ 5 files changed, 145 insertions(+), 2 deletions(-) diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs index 0c862378d81..9ca21bd8854 100644 --- a/src/nouveau/compiler/nak/api.rs +++ b/src/nouveau/compiler/nak/api.rs @@ -275,6 +275,7 @@ pub extern "C" fn nak_compile_shader( s.lower_ineg(); s.lower_par_copies(); s.lower_copy_swap(); + s.opt_jump_thread(); s.calc_instr_deps(); if DEBUG.print() { diff --git a/src/nouveau/compiler/nak/cfg.rs b/src/nouveau/compiler/nak/cfg.rs index d23e50cb25f..33303d4302a 100644 --- a/src/nouveau/compiler/nak/cfg.rs +++ b/src/nouveau/compiler/nak/cfg.rs @@ -328,6 +328,11 @@ impl<N> CFG<N> { pub fn pred_indices(&self, idx: usize) -> &[usize] { &self.nodes[idx].pred[..] } + + pub fn drain<'a>(&'a mut self) -> impl Iterator<Item = N> + 'a { + self.has_loop = false; + self.nodes.drain(..).map(|n| n.node) + } } impl<N> Index<usize> for CFG<N> { diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 2406134fff2..34a698bb2e2 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -4024,7 +4024,7 @@ impl DisplayOp for OpBSync { impl_display_for_op!(OpBSync); #[repr(C)] -#[derive(SrcsAsSlice, DstsAsSlice)] +#[derive(Clone, SrcsAsSlice, DstsAsSlice)] pub struct OpBra { pub target: Label, } @@ -4037,7 +4037,7 @@ impl DisplayOp for OpBra { impl_display_for_op!(OpBra); #[repr(C)] -#[derive(SrcsAsSlice, DstsAsSlice)] +#[derive(Clone, SrcsAsSlice, DstsAsSlice)] pub struct OpExit {} impl DisplayOp for OpExit { diff --git a/src/nouveau/compiler/nak/lib.rs b/src/nouveau/compiler/nak/lib.rs index 85f873a5bae..d0053679bc5 100644 --- a/src/nouveau/compiler/nak/lib.rs +++ b/src/nouveau/compiler/nak/lib.rs @@ -19,6 +19,7 @@ mod nir; mod opt_bar_prop; mod opt_copy_prop; mod opt_dce; +mod opt_jump_thread; mod opt_lop; mod opt_out; mod repair_ssa; diff --git a/src/nouveau/compiler/nak/opt_jump_thread.rs b/src/nouveau/compiler/nak/opt_jump_thread.rs new file mode 100644 index 00000000000..a85eefd66ad --- /dev/null +++ b/src/nouveau/compiler/nak/opt_jump_thread.rs @@ -0,0 +1,136 @@ +// Copyright © 2023 Mel Henning +// SPDX-License-Identifier: MIT + +use crate::cfg::CFGBuilder; +use crate::ir::*; +use std::collections::HashMap; + +fn clone_branch(op: &Op) -> Op { + match op { + Op::Bra(b) => Op::Bra(b.clone()), + Op::Exit(e) => Op::Exit(e.clone()), + _ => unreachable!(), + } +} + +fn jump_thread(func: &mut Function) -> bool { + // Let's call a basic block "trivial" if its only instruction is an + // unconditional branch. If a block is trivial, we can update all of its + // predecessors to jump to its sucessor. + // + // A single reverse pass over the basic blocks is enough to update all of + // the edges we're interested in. Roughly, if we assume that all loops in + // the shader can terminate, then loop heads are never trivial and we + // never replace a backward edge. Therefore, in each step we only need to + // make sure that later control flow has been replaced in order to update + // the current block as much as possible. + // + // We additionally try to update a branch-to-empty-block to point to the + // block's successor, which along with block dce/reordering can sometimes + // enable a later optimization that converts branches to fallthrough. + let mut progress = false; + + // A branch to label can be replaced with Op + let mut replacements: HashMap<Label, Op> = HashMap::new(); + + // Invariant 1: At the end of each loop iteration, + // every trivial block with an index in [i, blocks.len()) + // is represented in replacements.keys() + // Invariant 2: replacements.values() never contains + // a branch to a trivial block + for i in (0..func.blocks.len()).rev() { + // Replace the branch if possible + if let Some(instr) = func.blocks[i].instrs.last_mut() { + if let Op::Bra(OpBra { target }) = instr.op { + if let Some(replacement) = replacements.get(&target) { + instr.op = clone_branch(replacement); + progress = true; + } + // If the branch target was previously a trivial block then the + // branch was previously a forward edge (see above) and by + // invariants 1 and 2 we just updated the branch to target + // a nontrivial block + } + } + + // Is this block trivial? + let block_label = func.blocks[i].label; + match &func.blocks[i].instrs[..] { + [instr] => { + if instr.is_branch() && instr.pred.is_true() { + // Upholds invariant 2 because we updated the branch above + replacements.insert(block_label, clone_branch(&instr.op)); + } + } + [] => { + // Empty block - falls through + // Our successor might be trivial, so we need to + // apply the rewrite map to uphold invariant 2 + let target_label = func.blocks[i + 1].label; + let replacement = replacements + .get(&target_label) + .map(clone_branch) + .unwrap_or_else(|| { + Op::Bra(OpBra { + target: target_label, + }) + }); + replacements.insert(block_label, replacement); + } + _ => (), + } + } + + if progress { + // We don't update the CFG above, so rewrite it if we made progress + rewrite_cfg(func); + } + + return progress; +} + +fn rewrite_cfg(func: &mut Function) { + // CFGBuilder takes care of removing dead blocks for us + // We use the basic block's label to identify it + let mut builder = CFGBuilder::new(); + + for i in 0..func.blocks.len() { + let block = &func.blocks[i]; + // Note: fall-though must be first edge + if block.falls_through() { + let next_block = &func.blocks[i + 1]; + builder.add_edge(block.label, next_block.label); + } + if let Some(control_flow) = block.branch() { + match &control_flow.op { + Op::Bra(bra) => { + builder.add_edge(block.label, bra.target); + } + Op::Exit(_) => (), + _ => unreachable!(), + }; + } + } + + for block in func.blocks.drain() { + builder.add_node(block.label, block); + } + let _ = std::mem::replace(&mut func.blocks, builder.as_cfg()); +} + +impl Function { + pub fn opt_jump_thread(&mut self) { + jump_thread(self); + } +} + +impl Shader { + /// A simple jump threading pass + /// + /// Note that this can introduce critical edges, so it cannot be run before RA + pub fn opt_jump_thread(&mut self) { + for f in &mut self.functions { + f.opt_jump_thread(); + } + } +}
