Just noticed that we can reduce the number of input VGPRs
when dimensions y (and/or z) are unused. Similar to work groups.

Signed-off-by: Samuel Pitoiset <[email protected]>
---
 src/amd/common/ac_nir_to_llvm.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 391a4c2a60..5ba0d937c8 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -95,7 +95,7 @@ struct nir_to_llvm_context {
        LLVMValueRef view_index;
        LLVMValueRef num_work_groups;
        LLVMValueRef workgroup_ids[3];
-       LLVMValueRef local_invocation_ids;
+       LLVMValueRef local_invocation_ids[3];
        LLVMValueRef tg_size;
 
        LLVMValueRef vertex_buffers;
@@ -829,8 +829,14 @@ static void create_function(struct nir_to_llvm_context 
*ctx,
 
                if (ctx->shader_info->info.cs.uses_local_invocation_idx)
                        add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tg_size);
-               add_arg(&args, ARG_VGPR, ctx->ac.v3i32,
-                       &ctx->local_invocation_ids);
+
+               for (int i = 0; i < 3; i++) {
+                       ctx->local_invocation_ids[i] = NULL;
+                       if (ctx->shader_info->info.cs.uses_thread_id[i]) {
+                               add_arg(&args, ARG_VGPR, ctx->ac.i32,
+                                       &ctx->local_invocation_ids[i]);
+                       }
+               }
                break;
        case MESA_SHADER_VERTEX:
                declare_global_input_sgprs(ctx, stage, has_previous_stage,
@@ -4370,7 +4376,14 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
                break;
        }
        case nir_intrinsic_load_local_invocation_id: {
-               result = ctx->nctx->local_invocation_ids;
+               LLVMValueRef values[3];
+
+               for (int i = 0; i < 3; i++) {
+                       values[i] = ctx->nctx->local_invocation_ids[i] ?
+                                   ctx->nctx->local_invocation_ids[i] : 
ctx->ac.i32_0;
+               }
+
+               result = ac_build_gather_values(&ctx->ac, values, 3);
                break;
        }
        case nir_intrinsic_load_base_instance:
-- 
2.16.1

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to