Module: Mesa
Branch: main
Commit: dc5cc847ddeab1d41b00a520c19a8c1273dc967e
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=dc5cc847ddeab1d41b00a520c19a8c1273dc967e

Author: Faith Ekstrand <[email protected]>
Date:   Wed Nov 22 15:13:00 2023 -0600

nak: Implement nir_op_extract_*

This should make a lot of bit twiddling more efficient since NIR can
optimize certain shifts patterns to extract and we can implement it with
a single PRMT instruction.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26348>

---

 src/nouveau/compiler/nak.rs          |  2 --
 src/nouveau/compiler/nak_from_nir.rs | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/src/nouveau/compiler/nak.rs b/src/nouveau/compiler/nak.rs
index 9cb75648590..fae3d24d987 100644
--- a/src/nouveau/compiler/nak.rs
+++ b/src/nouveau/compiler/nak.rs
@@ -120,8 +120,6 @@ fn nir_options(_dev: &nv_device_info) -> 
nir_shader_compiler_options {
     op.lower_unpack_snorm_2x16 = true;
     op.lower_unpack_unorm_4x8 = true;
     op.lower_unpack_snorm_4x8 = true;
-    op.lower_extract_byte = true;
-    op.lower_extract_word = true;
     op.lower_insert_byte = true;
     op.lower_insert_word = true;
     op.lower_cs_local_index_to_id = true;
diff --git a/src/nouveau/compiler/nak_from_nir.rs 
b/src/nouveau/compiler/nak_from_nir.rs
index 40e87c63dcf..69d82252615 100644
--- a/src/nouveau/compiler/nak_from_nir.rs
+++ b/src/nouveau/compiler/nak_from_nir.rs
@@ -371,6 +371,42 @@ impl<'a> ShaderFromNir<'a> {
                 });
                 dst
             }
+            nir_op_extract_u8
+            | nir_op_extract_i8
+            | nir_op_extract_u16
+            | nir_op_extract_i16 => {
+                let src1 = alu.get_src(1);
+                let elem = src1.src.comp_as_uint(src1.swizzle[0]).unwrap();
+                let elem = u8::try_from(elem).unwrap();
+
+                match alu.op {
+                    nir_op_extract_u8 => {
+                        assert!(elem < 4);
+                        let byte = elem;
+                        let zero = 4;
+                        b.prmt(srcs[0], 0.into(), [byte, zero, zero, zero])
+                    }
+                    nir_op_extract_i8 => {
+                        assert!(elem < 4);
+                        let byte = elem;
+                        let sign = byte | 0x8;
+                        b.prmt(srcs[0], 0.into(), [byte, sign, sign, sign])
+                    }
+                    nir_op_extract_u16 => {
+                        assert!(elem < 2);
+                        let byte = elem * 2;
+                        let zero = 4;
+                        b.prmt(srcs[0], 0.into(), [byte, byte + 1, zero, zero])
+                    }
+                    nir_op_extract_i16 => {
+                        assert!(elem < 2);
+                        let byte = elem * 2;
+                        let sign = (byte + 1) | 0x8;
+                        b.prmt(srcs[0], 0.into(), [byte, byte + 1, sign, sign])
+                    }
+                    _ => panic!("Unknown extract op: {}", alu.op),
+                }
+            }
             nir_op_find_lsb => {
                 let tmp = b.alloc_ssa(RegFile::GPR, 1);
                 b.push_op(OpBrev {

Reply via email to