Module: Mesa
Branch: main
Commit: b2420fae4b19adffe3d8edfb283e547b953f1cf3
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b2420fae4b19adffe3d8edfb283e547b953f1cf3

Author: M Henning <[email protected]>
Date:   Sat Nov 25 15:56:00 2023 -0500

nak: Add a jump threading pass

This saves 16 instructions on the compute shader in Sascha Willems'
computecloth example.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26473>

---

 src/nouveau/compiler/nak/api.rs             |   1 +
 src/nouveau/compiler/nak/cfg.rs             |   5 +
 src/nouveau/compiler/nak/ir.rs              |   4 +-
 src/nouveau/compiler/nak/lib.rs             |   1 +
 src/nouveau/compiler/nak/opt_jump_thread.rs | 136 ++++++++++++++++++++++++++++
 5 files changed, 145 insertions(+), 2 deletions(-)

diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs
index 0c862378d81..9ca21bd8854 100644
--- a/src/nouveau/compiler/nak/api.rs
+++ b/src/nouveau/compiler/nak/api.rs
@@ -275,6 +275,7 @@ pub extern "C" fn nak_compile_shader(
     s.lower_ineg();
     s.lower_par_copies();
     s.lower_copy_swap();
+    s.opt_jump_thread();
     s.calc_instr_deps();
 
     if DEBUG.print() {
diff --git a/src/nouveau/compiler/nak/cfg.rs b/src/nouveau/compiler/nak/cfg.rs
index d23e50cb25f..33303d4302a 100644
--- a/src/nouveau/compiler/nak/cfg.rs
+++ b/src/nouveau/compiler/nak/cfg.rs
@@ -328,6 +328,11 @@ impl<N> CFG<N> {
     pub fn pred_indices(&self, idx: usize) -> &[usize] {
         &self.nodes[idx].pred[..]
     }
+
+    pub fn drain<'a>(&'a mut self) -> impl Iterator<Item = N> + 'a {
+        self.has_loop = false;
+        self.nodes.drain(..).map(|n| n.node)
+    }
 }
 
 impl<N> Index<usize> for CFG<N> {
diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs
index 2406134fff2..34a698bb2e2 100644
--- a/src/nouveau/compiler/nak/ir.rs
+++ b/src/nouveau/compiler/nak/ir.rs
@@ -4024,7 +4024,7 @@ impl DisplayOp for OpBSync {
 impl_display_for_op!(OpBSync);
 
 #[repr(C)]
-#[derive(SrcsAsSlice, DstsAsSlice)]
+#[derive(Clone, SrcsAsSlice, DstsAsSlice)]
 pub struct OpBra {
     pub target: Label,
 }
@@ -4037,7 +4037,7 @@ impl DisplayOp for OpBra {
 impl_display_for_op!(OpBra);
 
 #[repr(C)]
-#[derive(SrcsAsSlice, DstsAsSlice)]
+#[derive(Clone, SrcsAsSlice, DstsAsSlice)]
 pub struct OpExit {}
 
 impl DisplayOp for OpExit {
diff --git a/src/nouveau/compiler/nak/lib.rs b/src/nouveau/compiler/nak/lib.rs
index 85f873a5bae..d0053679bc5 100644
--- a/src/nouveau/compiler/nak/lib.rs
+++ b/src/nouveau/compiler/nak/lib.rs
@@ -19,6 +19,7 @@ mod nir;
 mod opt_bar_prop;
 mod opt_copy_prop;
 mod opt_dce;
+mod opt_jump_thread;
 mod opt_lop;
 mod opt_out;
 mod repair_ssa;
diff --git a/src/nouveau/compiler/nak/opt_jump_thread.rs 
b/src/nouveau/compiler/nak/opt_jump_thread.rs
new file mode 100644
index 00000000000..a85eefd66ad
--- /dev/null
+++ b/src/nouveau/compiler/nak/opt_jump_thread.rs
@@ -0,0 +1,136 @@
+// Copyright © 2023 Mel Henning
+// SPDX-License-Identifier: MIT
+
+use crate::cfg::CFGBuilder;
+use crate::ir::*;
+use std::collections::HashMap;
+
+fn clone_branch(op: &Op) -> Op {
+    match op {
+        Op::Bra(b) => Op::Bra(b.clone()),
+        Op::Exit(e) => Op::Exit(e.clone()),
+        _ => unreachable!(),
+    }
+}
+
+fn jump_thread(func: &mut Function) -> bool {
+    // Let's call a basic block "trivial" if its only instruction is an
+    // unconditional branch. If a block is trivial, we can update all of its
+    // predecessors to jump to its sucessor.
+    //
+    // A single reverse pass over the basic blocks is enough to update all of
+    // the edges we're interested in. Roughly, if we assume that all loops in
+    // the shader can terminate, then loop heads are never trivial and we
+    // never replace a backward edge. Therefore, in each step we only need to
+    // make sure that later control flow has been replaced in order to update
+    // the current block as much as possible.
+    //
+    // We additionally try to update a branch-to-empty-block to point to the
+    // block's successor, which along with block dce/reordering can sometimes
+    // enable a later optimization that converts branches to fallthrough.
+    let mut progress = false;
+
+    // A branch to label can be replaced with Op
+    let mut replacements: HashMap<Label, Op> = HashMap::new();
+
+    // Invariant 1: At the end of each loop iteration,
+    //              every trivial block with an index in [i, blocks.len())
+    //              is represented in replacements.keys()
+    // Invariant 2: replacements.values() never contains
+    //              a branch to a trivial block
+    for i in (0..func.blocks.len()).rev() {
+        // Replace the branch if possible
+        if let Some(instr) = func.blocks[i].instrs.last_mut() {
+            if let Op::Bra(OpBra { target }) = instr.op {
+                if let Some(replacement) = replacements.get(&target) {
+                    instr.op = clone_branch(replacement);
+                    progress = true;
+                }
+                // If the branch target was previously a trivial block then the
+                // branch was previously a forward edge (see above) and by
+                // invariants 1 and 2 we just updated the branch to target
+                // a nontrivial block
+            }
+        }
+
+        // Is this block trivial?
+        let block_label = func.blocks[i].label;
+        match &func.blocks[i].instrs[..] {
+            [instr] => {
+                if instr.is_branch() && instr.pred.is_true() {
+                    // Upholds invariant 2 because we updated the branch above
+                    replacements.insert(block_label, clone_branch(&instr.op));
+                }
+            }
+            [] => {
+                // Empty block - falls through
+                // Our successor might be trivial, so we need to
+                // apply the rewrite map to uphold invariant 2
+                let target_label = func.blocks[i + 1].label;
+                let replacement = replacements
+                    .get(&target_label)
+                    .map(clone_branch)
+                    .unwrap_or_else(|| {
+                        Op::Bra(OpBra {
+                            target: target_label,
+                        })
+                    });
+                replacements.insert(block_label, replacement);
+            }
+            _ => (),
+        }
+    }
+
+    if progress {
+        // We don't update the CFG above, so rewrite it if we made progress
+        rewrite_cfg(func);
+    }
+
+    return progress;
+}
+
+fn rewrite_cfg(func: &mut Function) {
+    // CFGBuilder takes care of removing dead blocks for us
+    // We use the basic block's label to identify it
+    let mut builder = CFGBuilder::new();
+
+    for i in 0..func.blocks.len() {
+        let block = &func.blocks[i];
+        // Note: fall-though must be first edge
+        if block.falls_through() {
+            let next_block = &func.blocks[i + 1];
+            builder.add_edge(block.label, next_block.label);
+        }
+        if let Some(control_flow) = block.branch() {
+            match &control_flow.op {
+                Op::Bra(bra) => {
+                    builder.add_edge(block.label, bra.target);
+                }
+                Op::Exit(_) => (),
+                _ => unreachable!(),
+            };
+        }
+    }
+
+    for block in func.blocks.drain() {
+        builder.add_node(block.label, block);
+    }
+    let _ = std::mem::replace(&mut func.blocks, builder.as_cfg());
+}
+
+impl Function {
+    pub fn opt_jump_thread(&mut self) {
+        jump_thread(self);
+    }
+}
+
+impl Shader {
+    /// A simple jump threading pass
+    ///
+    /// Note that this can introduce critical edges, so it cannot be run 
before RA
+    pub fn opt_jump_thread(&mut self) {
+        for f in &mut self.functions {
+            f.opt_jump_thread();
+        }
+    }
+}

Reply via email to