From: Marek Olšák <[email protected]>

---
 src/amd/common/ac_llvm_util.c            | 10 ++++++++++
 src/amd/common/ac_llvm_util.h            |  1 +
 src/amd/vulkan/radv_nir_to_llvm.c        |  7 ++-----
 src/gallium/drivers/radeonsi/si_shader.c |  7 ++-----
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index 5b701603ebb..c8a8bf146fe 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -262,20 +262,30 @@ ac_dump_module(LLVMModuleRef module)
 void
 ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
                                     const char *name, unsigned value)
 {
        char str[16];
 
        snprintf(str, sizeof(str), "0x%x", value);
        LLVMAddTargetDependentFunctionAttr(F, name, str);
 }
 
+void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size)
+{
+       if (!size)
+               return;
+
+       char str[32];
+       snprintf(str, sizeof(str), "%u,%u", size, size);
+       LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", 
str);
+}
+
 unsigned
 ac_count_scratch_private_memory(LLVMValueRef function)
 {
        unsigned private_mem_vgprs = 0;
 
        /* Process all LLVM instructions. */
        LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
        while (bb) {
                LLVMValueRef next = LLVMGetFirstInstruction(bb);
 
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index ca00540da80..18102be5207 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -102,20 +102,21 @@ void ac_dump_module(LLVMModuleRef module);
 LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
 bool ac_llvm_is_function(LLVMValueRef v);
 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx);
 
 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
                                 enum ac_float_mode float_mode);
 
 void
 ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
                                     const char *name, unsigned value);
+void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size);
 
 static inline unsigned
 ac_get_load_intr_attribs(bool can_speculate)
 {
        /* READNONE means writes can't affect it, while READONLY means that
         * writes can affect it. */
        return can_speculate ? AC_FUNC_ATTR_READNONE :
                               AC_FUNC_ATTR_READONLY;
 }
 
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index 341f6388f32..6f102647ba8 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -511,25 +511,22 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef 
module,
                        ac_add_attr_dereferenceable(P, UINT64_MAX);
                }
        }
 
        if (options->address32_hi) {
                ac_llvm_add_target_dep_function_attr(main_function,
                                                     
"amdgpu-32bit-address-high-bits",
                                                     options->address32_hi);
        }
 
-       if (max_workgroup_size) {
-               ac_llvm_add_target_dep_function_attr(main_function,
-                                                    
"amdgpu-max-work-group-size",
-                                                    max_workgroup_size);
-       }
+       ac_llvm_set_workgroup_size(main_function, max_workgroup_size);
+
        if (options->unsafe_math) {
                /* These were copied from some LLVM test. */
                LLVMAddTargetDependentFunctionAttr(main_function,
                                                   "less-precise-fpmad",
                                                   "true");
                LLVMAddTargetDependentFunctionAttr(main_function,
                                                   "no-infs-fp-math",
                                                   "true");
                LLVMAddTargetDependentFunctionAttr(main_function,
                                                   "no-nans-fp-math",
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index d2927d0254b..1ba6b8b6033 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4276,25 +4276,22 @@ void si_create_function(struct si_shader_context *ctx,
                if (fninfo->assign[i])
                        *fninfo->assign[i] = LLVMGetParam(ctx->main_fn, i);
        }
 
        if (ctx->screen->info.address32_hi) {
                ac_llvm_add_target_dep_function_attr(ctx->main_fn,
                                                     
"amdgpu-32bit-address-high-bits",
                                                     
ctx->screen->info.address32_hi);
        }
 
-       if (max_workgroup_size) {
-               ac_llvm_add_target_dep_function_attr(ctx->main_fn,
-                                                    
"amdgpu-max-work-group-size",
-                                                    max_workgroup_size);
-       }
+       ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
+
        LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
                                           "no-signed-zeros-fp-math",
                                           "true");
 
        if (ctx->screen->debug_flags & DBG(UNSAFE_MATH)) {
                /* These were copied from some LLVM test. */
                LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
                                                   "less-precise-fpmad",
                                                   "true");
                LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to