Module: Mesa
Branch: main
Commit: 4a3cebaffc167c07d5a5a34e349048aa3d1057fb
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4a3cebaffc167c07d5a5a34e349048aa3d1057fb

Author: Ganesh Belgur Ramachandra <[email protected]>
Date:   Tue Oct 17 04:41:14 2023 -0500

radeonsi: "create_fmask_expand_cs" shader in nir

Reviewed-by: Marek Olšák <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25782>

---

 src/gallium/drivers/radeonsi/si_compute_blit.c   |  2 +-
 src/gallium/drivers/radeonsi/si_pipe.h           |  2 +-
 src/gallium/drivers/radeonsi/si_shaderlib_nir.c  | 61 ++++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c | 64 ------------------------
 4 files changed, 63 insertions(+), 66 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c 
b/src/gallium/drivers/radeonsi/si_compute_blit.c
index cdb25ea53da..cddca6e6fc5 100644
--- a/src/gallium/drivers/radeonsi/si_compute_blit.c
+++ b/src/gallium/drivers/radeonsi/si_compute_blit.c
@@ -902,7 +902,7 @@ void si_compute_expand_fmask(struct pipe_context *ctx, 
struct pipe_resource *tex
    /* Bind the shader. */
    void **shader = &sctx->cs_fmask_expand[log_samples - 1][is_array];
    if (!*shader)
-      *shader = si_create_fmask_expand_cs(ctx, tex->nr_samples, is_array);
+      *shader = si_create_fmask_expand_cs(sctx, tex->nr_samples, is_array);
 
    /* Dispatch compute. */
    struct pipe_grid_info info = {0};
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index ec40792ae37..dcd0d8713d6 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1655,7 +1655,7 @@ void *si_create_dma_compute_shader(struct si_context 
*sctx, unsigned num_dwords_
 void *si_create_clear_buffer_rmw_cs(struct si_context *sctx);
 void *si_clear_render_target_shader(struct si_context *sctx, enum 
pipe_texture_target type);
 void *si_clear_12bytes_buffer_shader(struct si_context *sctx);
-void *si_create_fmask_expand_cs(struct pipe_context *ctx, unsigned 
num_samples, bool is_array);
+void *si_create_fmask_expand_cs(struct si_context *sctx, unsigned num_samples, 
bool is_array);
 void *si_create_query_result_cs(struct si_context *sctx);
 void *gfx11_create_sh_query_result_cs(struct si_context *sctx);
 
diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c 
b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
index 554b3bcfd0b..85f9ea03d11 100644
--- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
@@ -727,3 +727,64 @@ void *si_create_dma_compute_shader(struct si_context 
*sctx, unsigned num_dwords_
 
    return create_shader_state(sctx, b.shader);
 }
+
+/* Load samples from the image, and copy them to the same image. This looks 
like
+ * a no-op, but it's not. Loads use FMASK, while stores don't, so samples are
+ * reordered to match expanded FMASK.
+ *
+ * After the shader finishes, FMASK should be cleared to identity.
+ */
+void *si_create_fmask_expand_cs(struct si_context *sctx, unsigned num_samples, 
bool is_array)
+{
+   const nir_shader_compiler_options *options =
+      sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, 
PIPE_SHADER_COMPUTE);
+
+   nir_builder b =
+      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, 
"create_fmask_expand_cs");
+   b.shader->info.workgroup_size[0] = 8;
+   b.shader->info.workgroup_size[1] = 8;
+   b.shader->info.workgroup_size[2] = 1;
+
+   /* Return an empty compute shader */
+   if (num_samples == 0)
+      return create_shader_state(sctx, b.shader);
+
+   b.shader->info.num_images = 1;
+
+   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, 
is_array, GLSL_TYPE_FLOAT);
+   nir_variable *img = nir_variable_create(b.shader, nir_var_image, img_type, 
"image");
+   img->data.access = ACCESS_RESTRICT;
+
+   nir_def *z = nir_undef(&b, 1, 32);
+   if (is_array) {
+      z = nir_channel(&b, nir_load_workgroup_id(&b), 2);
+   }
+
+   nir_def *zero = nir_imm_int(&b, 0);
+   nir_def *address = get_global_ids(&b, 2);
+
+   nir_def *sample[8], *addresses[8];
+   assert(num_samples <= ARRAY_SIZE(sample));
+
+   nir_def *img_def = &nir_build_deref_var(&b, img)->def;
+
+   /* Load samples, resolving FMASK. */
+   for (unsigned i = 0; i < num_samples; i++) {
+      nir_def *it = nir_imm_int(&b, i);
+      sample[i] = nir_vec4(&b, nir_channel(&b, address, 0), nir_channel(&b, 
address, 1), z, it);
+      addresses[i] = nir_image_deref_load(&b, 4, 32, img_def, sample[i], it, 
zero,
+                                          .access = ACCESS_RESTRICT,
+                                          .image_dim = GLSL_SAMPLER_DIM_2D,
+                                          .image_array = is_array);
+   }
+
+   /* Store samples, ignoring FMASK. */
+   for (unsigned i = 0; i < num_samples; i++) {
+      nir_image_deref_store(&b, img_def, sample[i], nir_imm_int(&b, i), 
addresses[i], zero,
+                            .access = ACCESS_RESTRICT,
+                            .image_dim = GLSL_SAMPLER_DIM_2D,
+                            .image_array = is_array);
+   }
+
+   return create_shader_state(sctx, b.shader);
+}
diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c 
b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
index 8d9b4256fb5..73392715874 100644
--- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
+++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
@@ -282,70 +282,6 @@ void *si_create_query_result_cs(struct si_context *sctx)
    return sctx->b.create_compute_state(&sctx->b, &state);
 }
 
-/* Load samples from the image, and copy them to the same image. This looks 
like
- * a no-op, but it's not. Loads use FMASK, while stores don't, so samples are
- * reordered to match expanded FMASK.
- *
- * After the shader finishes, FMASK should be cleared to identity.
- */
-void *si_create_fmask_expand_cs(struct pipe_context *ctx, unsigned 
num_samples, bool is_array)
-{
-   enum tgsi_texture_type target = is_array ? TGSI_TEXTURE_2D_ARRAY_MSAA : 
TGSI_TEXTURE_2D_MSAA;
-   struct ureg_program *ureg = ureg_create(PIPE_SHADER_COMPUTE);
-   if (!ureg)
-      return NULL;
-
-   ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, 8);
-   ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, 8);
-   ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, 1);
-
-   /* Compute the image coordinates. */
-   struct ureg_src image = ureg_DECL_image(ureg, 0, target, 0, true, false);
-   struct ureg_src tid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_THREAD_ID, 
0);
-   struct ureg_src blk = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_BLOCK_ID, 
0);
-   struct ureg_dst coord = ureg_writemask(ureg_DECL_temporary(ureg), 
TGSI_WRITEMASK_XYZW);
-   ureg_UMAD(ureg, ureg_writemask(coord, TGSI_WRITEMASK_XY), ureg_swizzle(blk, 
0, 1, 1, 1),
-             ureg_imm2u(ureg, 8, 8), ureg_swizzle(tid, 0, 1, 1, 1));
-   if (is_array) {
-      ureg_MOV(ureg, ureg_writemask(coord, TGSI_WRITEMASK_Z), ureg_scalar(blk, 
TGSI_SWIZZLE_Z));
-   }
-
-   /* Load samples, resolving FMASK. */
-   struct ureg_dst sample[8];
-   assert(num_samples <= ARRAY_SIZE(sample));
-
-   for (unsigned i = 0; i < num_samples; i++) {
-      sample[i] = ureg_DECL_temporary(ureg);
-
-      ureg_MOV(ureg, ureg_writemask(coord, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 
i));
-
-      struct ureg_src srcs[] = {image, ureg_src(coord)};
-      ureg_memory_insn(ureg, TGSI_OPCODE_LOAD, &sample[i], 1, srcs, 2, 
TGSI_MEMORY_RESTRICT, target,
-                       0);
-   }
-
-   /* Store samples, ignoring FMASK. */
-   for (unsigned i = 0; i < num_samples; i++) {
-      ureg_MOV(ureg, ureg_writemask(coord, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 
i));
-
-      struct ureg_dst dst_image = ureg_dst(image);
-      struct ureg_src srcs[] = {ureg_src(coord), ureg_src(sample[i])};
-      ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &dst_image, 1, srcs, 2, 
TGSI_MEMORY_RESTRICT,
-                       target, 0);
-   }
-   ureg_END(ureg);
-
-   struct pipe_compute_state state = {};
-   state.ir_type = PIPE_SHADER_IR_TGSI;
-   state.prog = ureg_get_tokens(ureg, NULL);
-
-   void *cs = ctx->create_compute_state(ctx, &state);
-   ureg_destroy(ureg);
-   ureg_free_tokens(state.prog);
-
-   return cs;
-}
-
 /* Create the compute shader that is used to collect the results of gfx10+
  * shader queries.
  *

Reply via email to