Module: Mesa Branch: main Commit: 4a3cebaffc167c07d5a5a34e349048aa3d1057fb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4a3cebaffc167c07d5a5a34e349048aa3d1057fb
Author: Ganesh Belgur Ramachandra <[email protected]> Date: Tue Oct 17 04:41:14 2023 -0500 radeonsi: "create_fmask_expand_cs" shader in nir Reviewed-by: Marek Olšák <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25782> --- src/gallium/drivers/radeonsi/si_compute_blit.c | 2 +- src/gallium/drivers/radeonsi/si_pipe.h | 2 +- src/gallium/drivers/radeonsi/si_shaderlib_nir.c | 61 ++++++++++++++++++++++ src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c | 64 ------------------------ 4 files changed, 63 insertions(+), 66 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index cdb25ea53da..cddca6e6fc5 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -902,7 +902,7 @@ void si_compute_expand_fmask(struct pipe_context *ctx, struct pipe_resource *tex /* Bind the shader. */ void **shader = &sctx->cs_fmask_expand[log_samples - 1][is_array]; if (!*shader) - *shader = si_create_fmask_expand_cs(ctx, tex->nr_samples, is_array); + *shader = si_create_fmask_expand_cs(sctx, tex->nr_samples, is_array); /* Dispatch compute. */ struct pipe_grid_info info = {0}; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index ec40792ae37..dcd0d8713d6 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1655,7 +1655,7 @@ void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_ void *si_create_clear_buffer_rmw_cs(struct si_context *sctx); void *si_clear_render_target_shader(struct si_context *sctx, enum pipe_texture_target type); void *si_clear_12bytes_buffer_shader(struct si_context *sctx); -void *si_create_fmask_expand_cs(struct pipe_context *ctx, unsigned num_samples, bool is_array); +void *si_create_fmask_expand_cs(struct si_context *sctx, unsigned num_samples, bool is_array); void *si_create_query_result_cs(struct si_context *sctx); void *gfx11_create_sh_query_result_cs(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c index 554b3bcfd0b..85f9ea03d11 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c @@ -727,3 +727,64 @@ void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_ return create_shader_state(sctx, b.shader); } + +/* Load samples from the image, and copy them to the same image. This looks like + * a no-op, but it's not. Loads use FMASK, while stores don't, so samples are + * reordered to match expanded FMASK. + * + * After the shader finishes, FMASK should be cleared to identity. + */ +void *si_create_fmask_expand_cs(struct si_context *sctx, unsigned num_samples, bool is_array) +{ + const nir_shader_compiler_options *options = + sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE); + + nir_builder b = + nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "create_fmask_expand_cs"); + b.shader->info.workgroup_size[0] = 8; + b.shader->info.workgroup_size[1] = 8; + b.shader->info.workgroup_size[2] = 1; + + /* Return an empty compute shader */ + if (num_samples == 0) + return create_shader_state(sctx, b.shader); + + b.shader->info.num_images = 1; + + const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, is_array, GLSL_TYPE_FLOAT); + nir_variable *img = nir_variable_create(b.shader, nir_var_image, img_type, "image"); + img->data.access = ACCESS_RESTRICT; + + nir_def *z = nir_undef(&b, 1, 32); + if (is_array) { + z = nir_channel(&b, nir_load_workgroup_id(&b), 2); + } + + nir_def *zero = nir_imm_int(&b, 0); + nir_def *address = get_global_ids(&b, 2); + + nir_def *sample[8], *addresses[8]; + assert(num_samples <= ARRAY_SIZE(sample)); + + nir_def *img_def = &nir_build_deref_var(&b, img)->def; + + /* Load samples, resolving FMASK. */ + for (unsigned i = 0; i < num_samples; i++) { + nir_def *it = nir_imm_int(&b, i); + sample[i] = nir_vec4(&b, nir_channel(&b, address, 0), nir_channel(&b, address, 1), z, it); + addresses[i] = nir_image_deref_load(&b, 4, 32, img_def, sample[i], it, zero, + .access = ACCESS_RESTRICT, + .image_dim = GLSL_SAMPLER_DIM_2D, + .image_array = is_array); + } + + /* Store samples, ignoring FMASK. */ + for (unsigned i = 0; i < num_samples; i++) { + nir_image_deref_store(&b, img_def, sample[i], nir_imm_int(&b, i), addresses[i], zero, + .access = ACCESS_RESTRICT, + .image_dim = GLSL_SAMPLER_DIM_2D, + .image_array = is_array); + } + + return create_shader_state(sctx, b.shader); +} diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c index 8d9b4256fb5..73392715874 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c @@ -282,70 +282,6 @@ void *si_create_query_result_cs(struct si_context *sctx) return sctx->b.create_compute_state(&sctx->b, &state); } -/* Load samples from the image, and copy them to the same image. This looks like - * a no-op, but it's not. Loads use FMASK, while stores don't, so samples are - * reordered to match expanded FMASK. - * - * After the shader finishes, FMASK should be cleared to identity. - */ -void *si_create_fmask_expand_cs(struct pipe_context *ctx, unsigned num_samples, bool is_array) -{ - enum tgsi_texture_type target = is_array ? TGSI_TEXTURE_2D_ARRAY_MSAA : TGSI_TEXTURE_2D_MSAA; - struct ureg_program *ureg = ureg_create(PIPE_SHADER_COMPUTE); - if (!ureg) - return NULL; - - ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, 8); - ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, 8); - ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, 1); - - /* Compute the image coordinates. */ - struct ureg_src image = ureg_DECL_image(ureg, 0, target, 0, true, false); - struct ureg_src tid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_THREAD_ID, 0); - struct ureg_src blk = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_BLOCK_ID, 0); - struct ureg_dst coord = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZW); - ureg_UMAD(ureg, ureg_writemask(coord, TGSI_WRITEMASK_XY), ureg_swizzle(blk, 0, 1, 1, 1), - ureg_imm2u(ureg, 8, 8), ureg_swizzle(tid, 0, 1, 1, 1)); - if (is_array) { - ureg_MOV(ureg, ureg_writemask(coord, TGSI_WRITEMASK_Z), ureg_scalar(blk, TGSI_SWIZZLE_Z)); - } - - /* Load samples, resolving FMASK. */ - struct ureg_dst sample[8]; - assert(num_samples <= ARRAY_SIZE(sample)); - - for (unsigned i = 0; i < num_samples; i++) { - sample[i] = ureg_DECL_temporary(ureg); - - ureg_MOV(ureg, ureg_writemask(coord, TGSI_WRITEMASK_W), ureg_imm1u(ureg, i)); - - struct ureg_src srcs[] = {image, ureg_src(coord)}; - ureg_memory_insn(ureg, TGSI_OPCODE_LOAD, &sample[i], 1, srcs, 2, TGSI_MEMORY_RESTRICT, target, - 0); - } - - /* Store samples, ignoring FMASK. */ - for (unsigned i = 0; i < num_samples; i++) { - ureg_MOV(ureg, ureg_writemask(coord, TGSI_WRITEMASK_W), ureg_imm1u(ureg, i)); - - struct ureg_dst dst_image = ureg_dst(image); - struct ureg_src srcs[] = {ureg_src(coord), ureg_src(sample[i])}; - ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &dst_image, 1, srcs, 2, TGSI_MEMORY_RESTRICT, - target, 0); - } - ureg_END(ureg); - - struct pipe_compute_state state = {}; - state.ir_type = PIPE_SHADER_IR_TGSI; - state.prog = ureg_get_tokens(ureg, NULL); - - void *cs = ctx->create_compute_state(ctx, &state); - ureg_destroy(ureg); - ureg_free_tokens(state.prog); - - return cs; -} - /* Create the compute shader that is used to collect the results of gfx10+ * shader queries. *
