From: Marek Olšák <marek.ol...@amd.com> shader-db doesn't show any regression and 32-bit pointers with byval are declared as VGPRs for some reason. --- src/amd/common/ac_llvm_helper.cpp | 3 +-- src/amd/common/ac_llvm_util.c | 2 -- src/amd/common/ac_llvm_util.h | 1 - src/amd/common/ac_nir_to_llvm.c | 6 ++---- src/gallium/auxiliary/gallivm/lp_bld_intr.c | 2 -- src/gallium/auxiliary/gallivm/lp_bld_intr.h | 1 - src/gallium/drivers/radeonsi/si_shader.c | 17 +++++------------ 7 files changed, 8 insertions(+), 24 deletions(-)
diff --git a/src/amd/common/ac_llvm_helper.cpp b/src/amd/common/ac_llvm_helper.cpp index 4db7036..54562cc 100644 --- a/src/amd/common/ac_llvm_helper.cpp +++ b/src/amd/common/ac_llvm_helper.cpp @@ -52,22 +52,21 @@ void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes) #else A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes)); #endif } bool ac_is_sgpr_param(LLVMValueRef arg) { llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg); llvm::AttributeList AS = A->getParent()->getAttributes(); unsigned ArgNo = A->getArgNo(); - return AS.hasAttribute(ArgNo + 1, llvm::Attribute::ByVal) || - AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg); + return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg); } LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call) { #if HAVE_LLVM >= 0x0309 return LLVMGetCalledValue(call); #else return llvm::wrap(llvm::CallSite(llvm::unwrap<llvm::Instruction>(call)).getCalledValue()); #endif } diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index 429904c..5fd785a 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -145,39 +145,37 @@ LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, enum ac return tm; } #if HAVE_LLVM < 0x0400 static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr) { switch (attr) { case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute; - case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute; case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute; case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute; case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute; case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute; case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute; default: fprintf(stderr, "Unhandled function attribute: %x\n", attr); return 0; } } #else static const char *attr_to_str(enum ac_func_attr attr) { switch (attr) { case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline"; - case AC_FUNC_ATTR_BYVAL: return "byval"; case AC_FUNC_ATTR_INREG: return "inreg"; case AC_FUNC_ATTR_NOALIAS: return "noalias"; case AC_FUNC_ATTR_NOUNWIND: return "nounwind"; case AC_FUNC_ATTR_READNONE: return "readnone"; case AC_FUNC_ATTR_READONLY: return "readonly"; case AC_FUNC_ATTR_WRITEONLY: return "writeonly"; case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: return "inaccessiblememonly"; case AC_FUNC_ATTR_CONVERGENT: return "convergent"; default: fprintf(stderr, "Unhandled function attribute: %x\n", attr); diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h index 7c8b6b0..26b0959 100644 --- a/src/amd/common/ac_llvm_util.h +++ b/src/amd/common/ac_llvm_util.h @@ -30,21 +30,20 @@ #include <llvm-c/TargetMachine.h> #include "amd_family.h" #ifdef __cplusplus extern "C" { #endif enum ac_func_attr { AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0), - AC_FUNC_ATTR_BYVAL = (1 << 1), AC_FUNC_ATTR_INREG = (1 << 2), AC_FUNC_ATTR_NOALIAS = (1 << 3), AC_FUNC_ATTR_NOUNWIND = (1 << 4), AC_FUNC_ATTR_READNONE = (1 << 5), AC_FUNC_ATTR_READONLY = (1 << 6), AC_FUNC_ATTR_WRITEONLY = HAVE_LLVM >= 0x0400 ? (1 << 7) : 0, AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = HAVE_LLVM >= 0x0400 ? (1 << 8) : 0, AC_FUNC_ATTR_CONVERGENT = HAVE_LLVM >= 0x0400 ? (1 << 9) : 0, /* Legacy intrinsic that needs attributes on function declarations diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 48e2920..187fdfb 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -316,28 +316,26 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, main_function_type = LLVMFunctionType(ret_type, args->types, args->count, 0); LLVMValueRef main_function = LLVMAddFunction(module, "main", main_function_type); main_function_body = LLVMAppendBasicBlockInContext(ctx, main_function, "main_body"); LLVMPositionBuilderAtEnd(builder, main_function_body); LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS); for (unsigned i = 0; i < args->sgpr_count; ++i) { + ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG); + if (args->array_params_mask & (1 << i)) { LLVMValueRef P = LLVMGetParam(main_function, i); - ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_BYVAL); ac_add_attr_dereferenceable(P, UINT64_MAX); } - else { - ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG); - } } if (max_workgroup_size) { ac_llvm_add_target_dep_function_attr(main_function, "amdgpu-max-work-group-size", max_workgroup_size); } if (unsafe_math) { /* These were copied from some LLVM test. */ LLVMAddTargetDependentFunctionAttr(main_function, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c index b924555..74ed16f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c @@ -119,39 +119,37 @@ lp_declare_intrinsic(LLVMModuleRef module, return function; } #if HAVE_LLVM < 0x0400 static LLVMAttribute lp_attr_to_llvm_attr(enum lp_func_attr attr) { switch (attr) { case LP_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute; - case LP_FUNC_ATTR_BYVAL: return LLVMByValAttribute; case LP_FUNC_ATTR_INREG: return LLVMInRegAttribute; case LP_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute; case LP_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute; case LP_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute; case LP_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute; default: _debug_printf("Unhandled function attribute: %x\n", attr); return 0; } } #else static const char *attr_to_str(enum lp_func_attr attr) { switch (attr) { case LP_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline"; - case LP_FUNC_ATTR_BYVAL: return "byval"; case LP_FUNC_ATTR_INREG: return "inreg"; case LP_FUNC_ATTR_NOALIAS: return "noalias"; case LP_FUNC_ATTR_NOUNWIND: return "nounwind"; case LP_FUNC_ATTR_READNONE: return "readnone"; case LP_FUNC_ATTR_READONLY: return "readonly"; case LP_FUNC_ATTR_WRITEONLY: return "writeonly"; case LP_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: return "inaccessiblememonly"; case LP_FUNC_ATTR_CONVERGENT: return "convergent"; default: _debug_printf("Unhandled function attribute: %x\n", attr); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h index 0a929c5..bf8143d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h @@ -41,21 +41,20 @@ #include "gallivm/lp_bld_init.h" /** * Max number of arguments in an intrinsic. */ #define LP_MAX_FUNC_ARGS 32 enum lp_func_attr { LP_FUNC_ATTR_ALWAYSINLINE = (1 << 0), - LP_FUNC_ATTR_BYVAL = (1 << 1), LP_FUNC_ATTR_INREG = (1 << 2), LP_FUNC_ATTR_NOALIAS = (1 << 3), LP_FUNC_ATTR_NOUNWIND = (1 << 4), LP_FUNC_ATTR_READNONE = (1 << 5), LP_FUNC_ATTR_READONLY = (1 << 6), LP_FUNC_ATTR_WRITEONLY = HAVE_LLVM >= 0x0400 ? (1 << 7) : 0, LP_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = HAVE_LLVM >= 0x0400 ? (1 << 8) : 0, LP_FUNC_ATTR_CONVERGENT = HAVE_LLVM >= 0x0400 ? (1 << 9) : 0, /* Legacy intrinsic that needs attributes on function declarations diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 84a26a2..708da13 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4320,32 +4320,32 @@ static void si_create_function(struct si_shader_context *ctx, int i; si_llvm_create_func(ctx, name, returns, num_returns, fninfo->types, fninfo->num_params); ctx->return_value = LLVMGetUndef(ctx->return_type); for (i = 0; i < fninfo->num_sgpr_params; ++i) { LLVMValueRef P = LLVMGetParam(ctx->main_fn, i); /* The combination of: - * - ByVal + * - noalias * - dereferenceable * - invariant.load * allows the optimization passes to move loads and reduces * SGPR spilling significantly. */ + lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG); + if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) { - lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_BYVAL); lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_NOALIAS); ac_add_attr_dereferenceable(P, UINT64_MAX); - } else - lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG); + } } for (i = 0; i < fninfo->num_params; ++i) { if (fninfo->assign[i]) *fninfo->assign[i] = LLVMGetParam(ctx->main_fn, i); } if (max_workgroup_size) { si_llvm_add_attribute(ctx->main_fn, "amdgpu-max-work-group-size", max_workgroup_size); @@ -6459,29 +6459,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx, LLVMTypeRef param_type; bool is_sgpr; unsigned param_size; LLVMValueRef arg = NULL; param = LLVMGetParam(parts[part], param_idx); param_type = LLVMTypeOf(param); param_size = ac_get_type_size(param_type) / 4; is_sgpr = ac_is_sgpr_param(param); - if (is_sgpr) { -#if HAVE_LLVM < 0x0400 - LLVMRemoveAttribute(param, LLVMByValAttribute); -#else - unsigned kind_id = LLVMGetEnumAttributeKindForName("byval", 5); - LLVMRemoveEnumAttributeAtIndex(parts[part], param_idx + 1, kind_id); -#endif + if (is_sgpr) lp_add_function_attr(parts[part], param_idx + 1, LP_FUNC_ATTR_INREG); - } assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr : num_out)); assert(is_sgpr || out_idx >= num_out_sgpr); if (param_size == 1) arg = out[out_idx]; else arg = lp_build_gather_values(&ctx->gallivm, &out[out_idx], param_size); if (LLVMTypeOf(arg) != param_type) { -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev