Module: Mesa
Branch: main
Commit: 618bdb85718fbd54ff68b1d1581992cd7e22269f
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=618bdb85718fbd54ff68b1d1581992cd7e22269f

Author: Faith Ekstrand <[email protected]>
Date:   Fri Nov 10 11:51:11 2023 -0600

nak: Rework FS input interpolation

This gives FS I/O the same treatment as we did for vertex attributes in
that we now have a NIR intrinsic which pretty closely matches the
hardware and we lower to that before going into NAK.  This gives us a
bit more control in the NIR.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26181>

---

 src/compiler/nir/nir_divergence_analysis.c |   1 +
 src/compiler/nir/nir_intrinsics.py         |   3 +
 src/nouveau/compiler/nak_from_nir.rs       | 143 ++++++++---------------
 src/nouveau/compiler/nak_nir.c             | 181 +++++++++++++++++++++--------
 src/nouveau/compiler/nak_private.h         |  25 ++++
 5 files changed, 214 insertions(+), 139 deletions(-)

diff --git a/src/compiler/nir/nir_divergence_analysis.c 
b/src/compiler/nir/nir_divergence_analysis.c
index ad34c4db19d..91a5135deb2 100644
--- a/src/compiler/nir/nir_divergence_analysis.c
+++ b/src/compiler/nir/nir_divergence_analysis.c
@@ -619,6 +619,7 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr 
*instr)
    case nir_intrinsic_isberd_nv:
    case nir_intrinsic_al2p_nv:
    case nir_intrinsic_ald_nv:
+   case nir_intrinsic_ipa_nv:
       is_divergent = true;
       break;
 
diff --git a/src/compiler/nir/nir_intrinsics.py 
b/src/compiler/nir/nir_intrinsics.py
index 286b3993919..93b8b5280cf 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -2018,6 +2018,9 @@ intrinsic("ald_nv", dest_comp=0, src_comp=[1, 1], 
bit_sizes=[32],
 # FLAGS is struct nak_nir_attr_io_flags
 intrinsic("ast_nv", src_comp=[0, 1, 1],
           indices=[BASE, RANGE_BASE, RANGE, FLAGS], flags=[])
+# src[] = { inv_w, offset }.
+intrinsic("ipa_nv", dest_comp=1, src_comp=[1, 1], bit_sizes=[32],
+          indices=[BASE, FLAGS], flags=[CAN_ELIMINATE, CAN_REORDER])
 
 # NVIDIA-specific Geometry Shader intrinsics.
 # These contain an additional integer source and destination with the 
primitive handle input/output.
diff --git a/src/nouveau/compiler/nak_from_nir.rs 
b/src/nouveau/compiler/nak_from_nir.rs
index 746429a9095..8f5029dcedb 100644
--- a/src/nouveau/compiler/nak_from_nir.rs
+++ b/src/nouveau/compiler/nak_from_nir.rs
@@ -1557,6 +1557,56 @@ impl<'a> ShaderFromNir<'a> {
                 });
                 self.set_dst(&intrin.def, dst);
             }
+            nir_intrinsic_ipa_nv => {
+                let addr = u16::try_from(intrin.base()).unwrap();
+
+                let flags = intrin.flags();
+                let flags: nak_nir_ipa_flags =
+                    unsafe { std::mem::transmute_copy(&flags) };
+
+                let mode = match flags.interp_mode() {
+                    NAK_INTERP_MODE_PERSPECTIVE => PixelImap::Perspective,
+                    NAK_INTERP_MODE_SCREEN_LINEAR => PixelImap::ScreenLinear,
+                    NAK_INTERP_MODE_CONSTANT => PixelImap::Constant,
+                    _ => panic!("Unsupported interp mode"),
+                };
+
+                let freq = match flags.interp_freq() {
+                    NAK_INTERP_FREQ_PASS => InterpFreq::Pass,
+                    NAK_INTERP_FREQ_CONSTANT => InterpFreq::Constant,
+                    NAK_INTERP_FREQ_STATE => InterpFreq::State,
+                    _ => panic!("Invalid interp freq"),
+                };
+
+                let loc = match flags.interp_loc() {
+                    NAK_INTERP_LOC_DEFAULT => InterpLoc::Default,
+                    NAK_INTERP_LOC_CENTROID => InterpLoc::Centroid,
+                    NAK_INTERP_LOC_OFFSET => InterpLoc::Offset,
+                    _ => panic!("Invalid interp loc"),
+                };
+
+                let offset = if loc == InterpLoc::Offset {
+                    self.get_src(&srcs[1])
+                } else {
+                    0.into()
+                };
+
+                let ShaderIoInfo::Fragment(io) = &mut self.info.io else {
+                    panic!("OpIpa is only used for fragment shaders");
+                };
+
+                io.mark_attr_read(addr, mode);
+
+                let dst = b.alloc_ssa(RegFile::GPR, 1);
+                b.push_op(OpIpa {
+                    dst: dst.into(),
+                    addr: addr,
+                    freq: freq,
+                    loc: loc,
+                    offset: offset,
+                });
+                self.set_dst(&intrin.def, dst);
+            }
             nir_intrinsic_isberd_nv => {
                 let dst = b.alloc_ssa(RegFile::GPR, 1);
                 b.push_op(OpIsberd {
@@ -1596,99 +1646,6 @@ impl<'a> ShaderFromNir<'a> {
                 });
                 self.set_dst(&intrin.def, dst);
             }
-            nir_intrinsic_load_input => {
-                let ShaderIoInfo::Fragment(io) = &mut self.info.io else {
-                    panic!("load_input is only used for fragment shaders");
-                };
-
-                assert!(intrin.def.bit_size() == 32);
-                let comps = intrin.def.num_components;
-
-                let addr = u16::try_from(intrin.base()).unwrap()
-                    + u16::try_from(srcs[0].as_uint().unwrap()).unwrap()
-                    + 4 * u16::try_from(intrin.component()).unwrap();
-
-                let dst = b.alloc_ssa(RegFile::GPR, comps);
-                for c in 0..comps {
-                    let c_addr = addr + 4 * u16::from(c);
-
-                    io.mark_attr_read(c_addr, PixelImap::Constant);
-
-                    b.push_op(OpIpa {
-                        dst: dst[usize::from(c)].into(),
-                        addr: c_addr,
-                        freq: InterpFreq::Constant,
-                        loc: InterpLoc::Default,
-                        offset: SrcRef::Zero.into(),
-                    });
-                }
-                self.set_dst(&intrin.def, dst);
-            }
-            nir_intrinsic_load_interpolated_input => {
-                let bary =
-                    srcs[0].as_def().parent_instr().as_intrinsic().unwrap();
-                let addr = u16::try_from(intrin.base()).unwrap()
-                    + u16::try_from(srcs[1].as_uint().unwrap()).unwrap()
-                    + u16::try_from(intrin.component()).unwrap() * 4;
-                let (freq, loc) = match bary.intrinsic {
-                    nir_intrinsic_load_barycentric_at_offset_nv => {
-                        (InterpFreq::Pass, InterpLoc::Offset)
-                    }
-                    nir_intrinsic_load_barycentric_centroid => {
-                        (InterpFreq::Pass, InterpLoc::Centroid)
-                    }
-                    nir_intrinsic_load_barycentric_pixel => {
-                        (InterpFreq::Pass, InterpLoc::Default)
-                    }
-                    nir_intrinsic_load_barycentric_sample => {
-                        (InterpFreq::Pass, InterpLoc::Centroid)
-                    }
-                    _ => panic!("Unsupported barycentric"),
-                };
-
-                let interp_mode = match bary.interp_mode() {
-                    INTERP_MODE_NONE | INTERP_MODE_SMOOTH => {
-                        PixelImap::Perspective
-                    }
-                    INTERP_MODE_FLAT => PixelImap::Constant,
-                    INTERP_MODE_NOPERSPECTIVE => PixelImap::ScreenLinear,
-                    INTERP_MODE_EXPLICIT => PixelImap::Unused,
-                    _ => panic!("Unsupported interp mode"),
-                };
-
-                let offset = match bary.intrinsic {
-                    nir_intrinsic_load_barycentric_at_offset_nv => {
-                        self.get_src(&bary.get_src(0))
-                    }
-                    nir_intrinsic_load_barycentric_centroid
-                    | nir_intrinsic_load_barycentric_pixel
-                    | nir_intrinsic_load_barycentric_sample => 0.into(),
-                    _ => panic!("Unsupported interp mode"),
-                };
-
-                assert!(intrin.def.bit_size() == 32);
-                let comps = intrin.def.num_components();
-                let dst = b.alloc_ssa(RegFile::GPR, comps);
-
-                let ShaderIoInfo::Fragment(io) = &mut self.info.io else {
-                    panic!("input interpolation is only allowed in fragment 
shaders");
-                };
-
-                for c in 0..comps {
-                    let c_addr = addr + 4 * u16::from(c);
-
-                    io.mark_attr_read(c_addr, interp_mode);
-
-                    b.push_op(OpIpa {
-                        dst: dst[usize::from(c)].into(),
-                        addr: c_addr,
-                        freq: freq,
-                        loc: loc,
-                        offset: offset,
-                    });
-                }
-                self.set_dst(&intrin.def, dst);
-            }
             nir_intrinsic_load_sample_id => {
                 let dst = b.alloc_ssa(RegFile::GPR, 1);
                 b.push_op(OpPixLd {
diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c
index 27a467cf50a..d3c93c7eddd 100644
--- a/src/nouveau/compiler/nak_nir.c
+++ b/src/nouveau/compiler/nak_nir.c
@@ -557,14 +557,54 @@ nak_xfb_from_nir(const struct nir_xfb_info *nir_xfb)
 }
 
 static nir_def *
-load_frag_w(nir_builder *b, nir_def *bary)
+load_frag_w(nir_builder *b, enum nak_interp_loc interp_loc)
 {
    const uint16_t w_addr =
       nak_sysval_attr_addr(SYSTEM_VALUE_FRAG_COORD) + 12;
 
-   return nir_load_interpolated_input(b, 1, 32, bary,
-                                      nir_imm_int(b, 0), .base = w_addr,
-                                      .dest_type = nir_type_float32);
+   const struct nak_nir_ipa_flags flags = {
+      .interp_mode = NAK_INTERP_MODE_PERSPECTIVE,
+      .interp_freq = NAK_INTERP_FREQ_PASS,
+      .interp_loc = interp_loc,
+   };
+   uint32_t flags_u32;
+   memcpy(&flags_u32, &flags, sizeof(flags_u32));
+
+   return nir_ipa_nv(b, nir_imm_float(b, 0), nir_undef(b, 1, 32),
+                     .base = w_addr, .flags = flags_u32);
+}
+
+static nir_def *
+load_interpolated_input(nir_builder *b, unsigned num_components, uint32_t addr,
+                        enum nak_interp_mode interp_mode,
+                        enum nak_interp_loc interp_loc,
+                        nir_def *inv_w, nir_def *offset,
+                        const struct nak_compiler *nak)
+{
+   if (offset == NULL)
+      offset = nir_undef(b, 1, 32);
+
+   if (nak->sm >= 70) {
+      const struct nak_nir_ipa_flags flags = {
+         .interp_mode = interp_mode,
+         .interp_freq = NAK_INTERP_FREQ_PASS,
+         .interp_loc = interp_loc,
+      };
+      uint32_t flags_u32;
+      memcpy(&flags_u32, &flags, sizeof(flags_u32));
+
+      nir_def *comps[NIR_MAX_VEC_COMPONENTS];
+      for (unsigned c = 0; c < num_components; c++) {
+         comps[c] = nir_ipa_nv(b, nir_undef(b, 1, 32), offset,
+                               .base = addr + c * 4,
+                               .flags = flags_u32);
+         if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
+            comps[c] = nir_fmul(b, comps[c], inv_w);
+      }
+      return nir_vec(b, comps, num_components);
+   } else {
+      unreachable("Figure out input interpolation on Maxwell");
+   }
 }
 
 struct lower_fs_input_ctx {
@@ -586,51 +626,25 @@ lower_fs_input_intrin(nir_builder *b, nir_intrinsic_instr 
*intrin, void *data)
       return true;
    }
 
-   case nir_intrinsic_load_barycentric_at_offset: {
-      b->cursor = nir_before_instr(&intrin->instr);
-
-      nir_def *offset_f = intrin->src[0].ssa;
-      offset_f = nir_fclamp(b, offset_f, nir_imm_float(b, -0.5),
-                            nir_imm_float(b, 0.437500));
-      nir_def *offset_fixed =
-         nir_f2i32(b, nir_fmul_imm(b, offset_f, 4096.0));
-      nir_def *offset_packed =
-         nir_ior(b, nir_ishl_imm(b, nir_channel(b, offset_fixed, 1), 16),
-                    nir_iand_imm(b, nir_channel(b, offset_fixed, 0), 0xffff));
-
-      intrin->intrinsic = nir_intrinsic_load_barycentric_at_offset_nv;
-      nir_src_rewrite(&intrin->src[0], offset_packed);
-
-      return true;
-   }
-
    case nir_intrinsic_load_frag_coord:
    case nir_intrinsic_load_point_coord:
    case nir_intrinsic_load_sample_pos: {
       b->cursor = nir_before_instr(&intrin->instr);
 
-      nir_def *bary;
-      if (b->shader->info.fs.uses_sample_shading) {
-         bary = nir_load_barycentric_sample(b, 32,
-            .interp_mode = INTERP_MODE_SMOOTH);
-      } else {
-         bary = nir_load_barycentric_pixel(b, 32,
-            .interp_mode = INTERP_MODE_SMOOTH);
-      }
-
+      const enum nak_interp_loc interp_loc =
+         b->shader->info.fs.uses_sample_shading ? NAK_INTERP_LOC_CENTROID
+                                                : NAK_INTERP_LOC_DEFAULT;
       const uint32_t addr =
          intrin->intrinsic == nir_intrinsic_load_point_coord ?
          nak_sysval_attr_addr(SYSTEM_VALUE_POINT_COORD) :
          nak_sysval_attr_addr(SYSTEM_VALUE_FRAG_COORD);
 
-      nir_def *coord =
-         nir_load_interpolated_input(b, intrin->def.num_components, 32,
-                                     bary, nir_imm_int(b, 0),
-                                     .base = addr,
-                                     .dest_type = nir_type_float32);
-
-      nir_def *w = load_frag_w(b, bary);
-      coord = nir_fdiv(b, coord, w);
+      nir_def *w = load_frag_w(b, interp_loc);
+      nir_def *coord = load_interpolated_input(b, intrin->def.num_components,
+                                               addr,
+                                               NAK_INTERP_MODE_PERSPECTIVE,
+                                               interp_loc, nir_frcp(b, w),
+                                               NULL, ctx->nak);
 
       switch (intrin->intrinsic) {
       case nir_intrinsic_load_frag_coord:
@@ -651,17 +665,91 @@ lower_fs_input_intrin(nir_builder *b, nir_intrinsic_instr 
*intrin, void *data)
       return true;
    }
 
+   case nir_intrinsic_load_input: {
+      b->cursor = nir_before_instr(&intrin->instr);
+
+      uint16_t addr = nir_intrinsic_base(intrin) +
+                      nir_src_as_uint(intrin->src[0]) +
+                      nir_intrinsic_component(intrin) * 4;
+
+      const struct nak_nir_ipa_flags flags = {
+         .interp_mode = NAK_INTERP_MODE_CONSTANT,
+         .interp_freq = NAK_INTERP_FREQ_CONSTANT,
+         .interp_loc = NAK_INTERP_LOC_DEFAULT,
+      };
+      uint32_t flags_u32;
+      memcpy(&flags_u32, &flags, sizeof(flags_u32));
+
+      nir_def *comps[NIR_MAX_VEC_COMPONENTS];
+      for (unsigned c = 0; c < intrin->def.num_components; c++) {
+         comps[c] = nir_ipa_nv(b, nir_imm_float(b, 0), nir_undef(b, 1, 32),
+                               .base = addr + c * 4, .flags = flags_u32);
+      }
+      nir_def *res = nir_vec(b, comps, intrin->def.num_components);
+
+      nir_def_rewrite_uses(&intrin->def, res);
+      nir_instr_remove(&intrin->instr);
+
+      return true;
+   }
+
    case nir_intrinsic_load_interpolated_input: {
+      b->cursor = nir_before_instr(&intrin->instr);
+
+      const uint16_t addr = nir_intrinsic_base(intrin) +
+                            nir_src_as_uint(intrin->src[1]) +
+                            nir_intrinsic_component(intrin) * 4;
+
       nir_intrinsic_instr *bary = nir_src_as_intrinsic(intrin->src[0]);
-      if (nir_intrinsic_interp_mode(bary) != INTERP_MODE_SMOOTH &&
-          nir_intrinsic_interp_mode(bary) != INTERP_MODE_NONE)
-         return false;
 
-      b->cursor = nir_after_instr(&intrin->instr);
+      enum nak_interp_mode interp_mode;
+      if (nir_intrinsic_interp_mode(bary) == INTERP_MODE_SMOOTH ||
+          nir_intrinsic_interp_mode(bary) == INTERP_MODE_NONE)
+         interp_mode = NAK_INTERP_MODE_PERSPECTIVE;
+      else
+         interp_mode = NAK_INTERP_MODE_SCREEN_LINEAR;
+
+      nir_def *offset = NULL;
+      enum nak_interp_loc interp_loc;
+      switch (bary->intrinsic) {
+      case nir_intrinsic_load_barycentric_at_offset: {
+         interp_loc = NAK_INTERP_LOC_OFFSET;
+
+         nir_def *offset_f = bary->src[0].ssa;
+         offset_f = nir_fclamp(b, offset_f, nir_imm_float(b, -0.5),
+                               nir_imm_float(b, 0.437500));
+         nir_def *offset_fixed =
+            nir_f2i32(b, nir_fmul_imm(b, offset_f, 4096.0));
+         offset = nir_ior(b, nir_ishl_imm(b, nir_channel(b, offset_fixed, 1),
+                                             16),
+                             nir_iand_imm(b, nir_channel(b, offset_fixed, 0),
+                                             0xffff));
+         break;
+      }
+
+      case nir_intrinsic_load_barycentric_centroid:
+      case nir_intrinsic_load_barycentric_sample:
+         interp_loc = NAK_INTERP_LOC_CENTROID;
+         break;
+
+      case nir_intrinsic_load_barycentric_pixel:
+         interp_loc = NAK_INTERP_LOC_DEFAULT;
+         break;
 
-      /* Perspective-correct interpolated inputs need to be divided by .w */
-      nir_def *res = nir_fdiv(b, &intrin->def, load_frag_w(b, &bary->def));
-      nir_def_rewrite_uses_after(&intrin->def, res, res->parent_instr);
+      default:
+         unreachable("Unsupported barycentric");
+      }
+
+      nir_def *inv_w = NULL;
+      if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
+         inv_w = nir_frcp(b, load_frag_w(b, interp_loc));
+
+      nir_def *res = load_interpolated_input(b, intrin->def.num_components,
+                                             addr, interp_mode, interp_loc,
+                                             inv_w, offset, ctx->nak);
+
+      nir_def_rewrite_uses(&intrin->def, res);
+      nir_instr_remove(&intrin->instr);
 
       return true;
    }
@@ -693,6 +781,7 @@ nak_nir_lower_fs_inputs(nir_shader *nir,
                         const struct nak_fs_key *fs_key)
 {
    NIR_PASS_V(nir, nak_nir_lower_varyings, nir_var_shader_in);
+   NIR_PASS_V(nir, nir_opt_constant_folding);
 
    const struct lower_fs_input_ctx fs_in_ctx = {
       .nak = nak,
diff --git a/src/nouveau/compiler/nak_private.h 
b/src/nouveau/compiler/nak_private.h
index 2c70692d949..7da483964f4 100644
--- a/src/nouveau/compiler/nak_private.h
+++ b/src/nouveau/compiler/nak_private.h
@@ -156,6 +156,31 @@ struct nak_nir_attr_io_flags {
 
 bool nak_nir_lower_vtg_io(nir_shader *nir, const struct nak_compiler *nak);
 
+enum nak_interp_mode {
+   NAK_INTERP_MODE_PERSPECTIVE,
+   NAK_INTERP_MODE_SCREEN_LINEAR,
+   NAK_INTERP_MODE_CONSTANT,
+};
+
+enum nak_interp_freq {
+    NAK_INTERP_FREQ_PASS,
+    NAK_INTERP_FREQ_CONSTANT,
+    NAK_INTERP_FREQ_STATE,
+};
+
+enum nak_interp_loc {
+   NAK_INTERP_LOC_DEFAULT,
+   NAK_INTERP_LOC_CENTROID,
+   NAK_INTERP_LOC_OFFSET,
+};
+
+struct nak_nir_ipa_flags {
+   enum nak_interp_mode interp_mode:2;
+   enum nak_interp_freq interp_freq:2;
+   enum nak_interp_loc interp_loc:2;
+   uint32_t pad:26;
+};
+
 enum nak_fs_out {
    NAK_FS_OUT_COLOR0 = 0x00,
    NAK_FS_OUT_COLOR1 = 0x10,

Reply via email to