Hi I think this patch is causing build problems for me
/var/tmp/portage/media-libs/mesa-9999/work/mesa-9999/src/amd/common/ac_nir_to_llvm.c: In function ‘create_llvm_function’: /var/tmp/portage/media-libs/mesa-9999/work/mesa-9999/src/amd/common/ac_nir_to_llvm.c:265:4: error: implicit declaration of function ‘ac_add_function_attr’ [-Werror=implicit-function-declaration] ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_BYVAL); ^~~~~~~~~~~~~~~~~~~~ /var/tmp/portage/media-libs/mesa-9999/work/mesa-9999/src/amd/common/ac_nir_to_llvm.c: In function ‘visit_interp’: /var/tmp/portage/media-libs/mesa-9999/work/mesa-9999/src/amd/common/ac_nir_to_llvm.c:3022:11: warning: ‘location’ may be used uninitialized in this function [-Wmaybe-uninitialized] unsigned location; ^~~~~~~~ /var/tmp/portage/media-libs/mesa-9999/work/mesa-9999/src/amd/common/ac_nir_to_llvm.c:3089:10: warning: ‘src_c1’ may be used uninitialized in this function [-Wmaybe-uninitialized] temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, ""); ~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /var/tmp/portage/media-libs/mesa-9999/work/mesa-9999/src/amd/common/ac_nir_to_llvm.c:3086:10: warning: ‘src_c0’ may be used uninitialized in this function [-Wmaybe-uninitialized] temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, ""); ~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /var/tmp/portage/media-libs/mesa-9999/work/mesa-9999/src/amd/common/ac_nir_to_llvm.c:3044:12: warning: ‘src0’ may be used uninitialized in this function [-Wmaybe-uninitialized] src_c1 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, "")); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: some warnings being treated as errors Was it tested? Mike On Wed, 1 Mar 2017 at 17:22 Marek Olšák <mar...@gmail.com> wrote: > On Wed, Mar 1, 2017 at 5:29 PM, Jose Fonseca <jfons...@vmware.com> wrote: > > On 23/02/17 00:01, Marek Olšák wrote: > >> > >> From: Marek Olšák <marek.ol...@amd.com> > >> > >> They can vary at call sites if the intrinsic is NOT a legacy SI > intrinsic. > >> We need this to force readnone or inaccessiblememonly on some amdgcn > >> intrinsics. > >> > >> This is only used with LLVM 4.0 and later. Intrinsics only used with > >> LLVM <= 3.9 don't need the LEGACY flag. > >> > >> gallivm and ac code is in the same patch, because splitting would be > >> more complicated with all the LEGACY uses all over the place. > >> --- > >> src/amd/common/ac_llvm_build.c | 23 ++++---- > >> src/amd/common/ac_llvm_util.c | 31 +++++++---- > >> src/amd/common/ac_llvm_util.h | 17 +++--- > >> src/amd/common/ac_nir_to_llvm.c | 63 > >> ++++++++++++++-------- > >> src/gallium/auxiliary/draw/draw_llvm.c | 6 ++- > >> src/gallium/auxiliary/gallivm/lp_bld_intr.c | 51 > ++++++++++++------ > >> src/gallium/auxiliary/gallivm/lp_bld_intr.h | 11 ++-- > >> src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 3 +- > >> src/gallium/drivers/llvmpipe/lp_state_fs.c | 3 +- > >> src/gallium/drivers/llvmpipe/lp_state_setup.c | 7 +-- > >> src/gallium/drivers/radeonsi/si_shader.c | 64 > >> ++++++++++++++--------- > >> src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 14 +++-- > >> 12 files changed, 184 insertions(+), 109 deletions(-) > >> > >> diff --git a/src/amd/common/ac_llvm_build.c > >> b/src/amd/common/ac_llvm_build.c > >> index 2f25b14..5c8b7f7 100644 > >> --- a/src/amd/common/ac_llvm_build.c > >> +++ b/src/amd/common/ac_llvm_build.c > >> @@ -75,47 +75,50 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, > >> LLVMContextRef context) > >> > "amdgpu.uniform", > >> 14); > >> > >> ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0); > >> } > >> > >> LLVMValueRef > >> ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name, > >> LLVMTypeRef return_type, LLVMValueRef *params, > >> unsigned param_count, unsigned attrib_mask) > >> { > >> - LLVMValueRef function; > >> + LLVMValueRef function, call; > >> + bool set_callsite_attrs = HAVE_LLVM >= 0x0400 && > >> + !(attrib_mask & AC_FUNC_ATTR_LEGACY); > >> > >> function = LLVMGetNamedFunction(ctx->module, name); > >> if (!function) { > >> LLVMTypeRef param_types[32], function_type; > >> unsigned i; > >> > >> assert(param_count <= 32); > >> > >> for (i = 0; i < param_count; ++i) { > >> assert(params[i]); > >> param_types[i] = LLVMTypeOf(params[i]); > >> } > >> function_type = > >> LLVMFunctionType(return_type, param_types, > >> param_count, 0); > >> function = LLVMAddFunction(ctx->module, name, > >> function_type); > >> > >> LLVMSetFunctionCallConv(function, LLVMCCallConv); > >> LLVMSetLinkage(function, LLVMExternalLinkage); > >> > >> - attrib_mask |= AC_FUNC_ATTR_NOUNWIND; > >> - while (attrib_mask) { > >> - enum ac_func_attr attr = 1u << > >> u_bit_scan(&attrib_mask); > >> - ac_add_function_attr(function, -1, attr); > >> - } > >> + if (!set_callsite_attrs) > >> + ac_add_func_attributes(ctx->context, function, > >> attrib_mask); > >> } > >> - return LLVMBuildCall(ctx->builder, function, params, > param_count, > >> ""); > >> + > >> + call = LLVMBuildCall(ctx->builder, function, params, > param_count, > >> ""); > >> + if (set_callsite_attrs) > >> + ac_add_func_attributes(ctx->context, call, attrib_mask); > >> + return call; > >> } > >> > >> LLVMValueRef > >> ac_build_gather_values_extended(struct ac_llvm_context *ctx, > >> LLVMValueRef *values, > >> unsigned value_count, > >> unsigned value_stride, > >> bool load) > >> { > >> LLVMBuilderRef builder = ctx->builder; > >> @@ -524,21 +527,22 @@ ac_build_tbuffer_store(struct ac_llvm_context > *ctx, > >> /* The instruction offset field has 12 bits */ > >> assert(offen || inst_offset < (1 << 12)); > >> > >> /* The intrinsic is overloaded, we need to add a type suffix for > >> overloading to work. */ > >> unsigned func = CLAMP(num_channels, 1, 3) - 1; > >> const char *types[] = {"i32", "v2i32", "v4i32"}; > >> char name[256]; > >> snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", > >> types[func]); > >> > >> ac_emit_llvm_intrinsic(ctx, name, ctx->voidt, > >> - args, ARRAY_SIZE(args), 0); > >> + args, ARRAY_SIZE(args), > >> + AC_FUNC_ATTR_LEGACY); > >> } > >> > >> void > >> ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx, > >> LLVMValueRef rsrc, > >> LLVMValueRef vdata, > >> unsigned num_channels, > >> LLVMValueRef vaddr, > >> LLVMValueRef soffset, > >> unsigned inst_offset) > >> @@ -836,12 +840,13 @@ LLVMValueRef ac_emit_clamp(struct ac_llvm_context > >> *ctx, LLVMValueRef value) > >> > >> const char *intr = HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." : > >> "llvm.AMDIL.clamp."; > >> LLVMValueRef args[3] = { > >> value, > >> LLVMConstReal(ctx->f32, 0), > >> LLVMConstReal(ctx->f32, 1), > >> }; > >> > >> return ac_emit_llvm_intrinsic(ctx, intr, ctx->f32, args, 3, > >> - AC_FUNC_ATTR_READNONE); > >> + AC_FUNC_ATTR_READNONE | > >> + AC_FUNC_ATTR_LEGACY); > >> } > >> diff --git a/src/amd/common/ac_llvm_util.c > b/src/amd/common/ac_llvm_util.c > >> index be127c5..fb525dd 100644 > >> --- a/src/amd/common/ac_llvm_util.c > >> +++ b/src/amd/common/ac_llvm_util.c > >> @@ -17,21 +17,21 @@ > >> * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE > OR > >> THE > >> * USE OR OTHER DEALINGS IN THE SOFTWARE. > >> * > >> * The above copyright notice and this permission notice (including the > >> * next paragraph) shall be included in all copies or substantial > >> portions > >> * of the Software. > >> * > >> */ > >> /* based on pieces from si_pipe.c and radeon_llvm_emit.c */ > >> #include "ac_llvm_util.h" > >> - > >> +#include "util/bitscan.h" > >> #include <llvm-c/Core.h> > >> > >> #include "c11/threads.h" > >> > >> #include <assert.h> > >> #include <stdio.h> > >> #include <string.h> > >> > >> static void ac_init_llvm_target() > >> { > >> @@ -173,40 +173,53 @@ static const char *attr_to_str(enum ac_func_attr > >> attr) > >> case AC_FUNC_ATTR_READNONE: return "readnone"; > >> case AC_FUNC_ATTR_READONLY: return "readonly"; > >> default: > >> fprintf(stderr, "Unhandled function attribute: %x\n", attr); > >> return 0; > >> } > >> } > >> > >> #endif > >> > >> -void > >> -ac_add_function_attr(LLVMValueRef function, > >> - int attr_idx, > >> - enum ac_func_attr attr) > >> +static void > >> +ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, > >> + int attr_idx, enum ac_func_attr attr) > >> { > >> - > >> #if HAVE_LLVM < 0x0400 > >> LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr); > >> if (attr_idx == -1) { > >> LLVMAddFunctionAttr(function, llvm_attr); > >> } else { > >> LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), > llvm_attr); > >> } > >> #else > >> - LLVMContextRef context = > >> LLVMGetModuleContext(LLVMGetGlobalParent(function)); > >> const char *attr_name = attr_to_str(attr); > >> unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, > >> > strlen(attr_name)); > >> - LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, > kind_id, > >> 0); > >> - LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); > >> + LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, > 0); > >> + > >> + if (LLVMIsAFunction(function)) > >> + LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); > >> + else > >> + LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr); > >> #endif > >> } > >> > >> +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, > >> + unsigned attrib_mask) > >> +{ > >> + attrib_mask |= AC_FUNC_ATTR_NOUNWIND; > >> + attrib_mask &= ~AC_FUNC_ATTR_LEGACY; > >> + > >> + while (attrib_mask) { > >> + enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask); > >> + ac_add_function_attr(ctx, function, -1, attr); > >> + } > >> +} > >> + > >> void > >> ac_dump_module(LLVMModuleRef module) > >> { > >> char *str = LLVMPrintModuleToString(module); > >> fprintf(stderr, "%s", str); > >> LLVMDisposeMessage(str); > >> } > >> diff --git a/src/amd/common/ac_llvm_util.h > b/src/amd/common/ac_llvm_util.h > >> index 93d3d27..4fe4ab4 100644 > >> --- a/src/amd/common/ac_llvm_util.h > >> +++ b/src/amd/common/ac_llvm_util.h > >> @@ -34,28 +34,29 @@ extern "C" { > >> #endif > >> > >> enum ac_func_attr { > >> AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0), > >> AC_FUNC_ATTR_BYVAL = (1 << 1), > >> AC_FUNC_ATTR_INREG = (1 << 2), > >> AC_FUNC_ATTR_NOALIAS = (1 << 3), > >> AC_FUNC_ATTR_NOUNWIND = (1 << 4), > >> AC_FUNC_ATTR_READNONE = (1 << 5), > >> AC_FUNC_ATTR_READONLY = (1 << 6), > >> + > >> + /* Legacy intrinsic that needs attributes on function > declarations > >> + * and they must match the internal LLVM definition exactly, > >> otherwise > >> + * intrinsic selection fails. > >> + */ > >> + AC_FUNC_ATTR_LEGACY = (1u << 31), > >> }; > >> > >> LLVMTargetMachineRef ac_create_target_machine(enum radeon_family > family, > >> bool supports_spill); > >> > >> void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes); > >> bool ac_is_sgpr_param(LLVMValueRef param); > >> - > >> -void > >> -ac_add_function_attr(LLVMValueRef function, > >> - int attr_idx, > >> - enum ac_func_attr attr); > >> - > >> -void > >> -ac_dump_module(LLVMModuleRef module); > >> +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, > >> + unsigned attrib_mask); > >> +void ac_dump_module(LLVMModuleRef module); > >> > >> #ifdef __cplusplus > >> } > >> #endif > >> diff --git a/src/amd/common/ac_nir_to_llvm.c > >> b/src/amd/common/ac_nir_to_llvm.c > >> index b3dc63c..4ec19d5 100644 > >> --- a/src/amd/common/ac_nir_to_llvm.c > >> +++ b/src/amd/common/ac_nir_to_llvm.c > >> @@ -1039,26 +1039,27 @@ static LLVMValueRef emit_imul_high(struct > >> nir_to_llvm_context *ctx, > >> src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, ""); > >> src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, ""); > >> > >> dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); > >> dst64 = LLVMBuildAShr(ctx->builder, dst64, > LLVMConstInt(ctx->i64, > >> 32, false), ""); > >> result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); > >> return result; > >> } > >> > >> static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context > >> *ctx, > >> - const char *intrin, > >> + const char *intrin, unsigned > >> attr_mask, > >> LLVMValueRef srcs[3]) > >> { > >> LLVMValueRef result; > >> LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, > >> srcs[2], LLVMConstInt(ctx->i32, 32, false), ""); > >> - result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, > srcs, > >> 3, AC_FUNC_ATTR_READNONE); > >> + result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, > srcs, > >> 3, > >> + AC_FUNC_ATTR_READNONE | > >> attr_mask); > >> > >> result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, > >> ""); > >> return result; > >> } > >> > >> static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context > *ctx, > >> LLVMValueRef src0, LLVMValueRef > >> src1, > >> LLVMValueRef src2, LLVMValueRef > >> src3) > >> { > >> LLVMValueRef bfi_args[3], result; > >> @@ -1418,24 +1419,26 @@ static void visit_alu(struct nir_to_llvm_context > >> *ctx, nir_alu_instr *instr) > >> break; > >> case nir_op_fmin: > >> result = emit_intrin_2f_param(ctx, "llvm.minnum", > >> to_float_type(ctx, > >> def_type), src[0], src[1]); > >> break; > >> case nir_op_ffma: > >> result = emit_intrin_3f_param(ctx, "llvm.fma", > >> to_float_type(ctx, > >> def_type), src[0], src[1], src[2]); > >> break; > >> case nir_op_ibitfield_extract: > >> - result = emit_bitfield_extract(ctx, > "llvm.AMDGPU.bfe.i32", > >> src); > >> + result = emit_bitfield_extract(ctx, > "llvm.AMDGPU.bfe.i32", > >> + AC_FUNC_ATTR_LEGACY, > src); > >> break; > >> case nir_op_ubitfield_extract: > >> - result = emit_bitfield_extract(ctx, > "llvm.AMDGPU.bfe.u32", > >> src); > >> + result = emit_bitfield_extract(ctx, > "llvm.AMDGPU.bfe.u32", > >> + AC_FUNC_ATTR_LEGACY, > src); > >> break; > >> case nir_op_bitfield_insert: > >> result = emit_bitfield_insert(ctx, src[0], src[1], > src[2], > >> src[3]); > >> break; > >> case nir_op_bitfield_reverse: > >> result = ac_emit_llvm_intrinsic(&ctx->ac, > >> "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); > >> break; > >> case nir_op_bit_count: > >> result = ac_emit_llvm_intrinsic(&ctx->ac, > >> "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); > >> break; > >> @@ -1635,22 +1638,23 @@ static LLVMValueRef > >> radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, > >> txq_args[txq_arg_count++] = tinfo->args[1]; > >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, > >> 0); /* dmask */ > >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, > 0); > >> /* unorm */ > >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, > 0); > >> /* r128 */ > >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? > 1 > >> : 0, 0); > >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, > 0); > >> /* glc */ > >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, > 0); > >> /* slc */ > >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, > 0); > >> /* tfe */ > >> txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, > 0); > >> /* lwe */ > >> size = ac_emit_llvm_intrinsic(&ctx->ac, > >> "llvm.SI.getresinfo.i32", ctx->v4i32, > >> - txq_args, txq_arg_count, > >> - AC_FUNC_ATTR_READNONE); > >> + txq_args, txq_arg_count, > >> + AC_FUNC_ATTR_READNONE | > >> + AC_FUNC_ATTR_LEGACY); > >> > >> for (c = 0; c < 2; c++) { > >> half_texel[c] = > >> LLVMBuildExtractElement(ctx->builder, size, > >> > >> LLVMConstInt(ctx->i32, c, false), ""); > >> half_texel[c] = LLVMBuildUIToFP(ctx->builder, > >> half_texel[c], ctx->f32, ""); > >> half_texel[c] = ac_emit_fdiv(&ctx->ac, > >> ctx->f32one, half_texel[c]); > >> half_texel[c] = LLVMBuildFMul(ctx->builder, > >> half_texel[c], > >> > >> LLVMConstReal(ctx->f32, -0.5), ""); > >> } > >> } > >> @@ -1660,21 +1664,22 @@ static LLVMValueRef > >> radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, > >> LLVMValueRef index = LLVMConstInt(ctx->i32, > >> coord_vgpr_index + c, 0); > >> tmp = LLVMBuildExtractElement(ctx->builder, coord, > index, > >> ""); > >> tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, ""); > >> tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], > ""); > >> tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, ""); > >> coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, > >> index, ""); > >> } > >> > >> tinfo->args[0] = coord; > >> return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, > >> tinfo->dst_type, tinfo->args, tinfo->arg_count, > >> - AC_FUNC_ATTR_READNONE | > >> AC_FUNC_ATTR_NOUNWIND); > >> + AC_FUNC_ATTR_READNONE | > >> AC_FUNC_ATTR_NOUNWIND | > >> + AC_FUNC_ATTR_LEGACY); > >> > >> } > >> > >> static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context > *ctx, > >> nir_tex_instr *instr, > >> struct ac_tex_info *tinfo) > >> { > >> const char *name = "llvm.SI.image.sample"; > >> const char *infix = ""; > >> char intr_name[127]; > >> @@ -1728,21 +1733,22 @@ static LLVMValueRef build_tex_intrinsic(struct > >> nir_to_llvm_context *ctx, > >> has_offset ? ".o" : "", type); > >> > >> if (instr->op == nir_texop_tg4) { > >> enum glsl_base_type stype = > >> glsl_get_sampler_result_type(instr->texture->var->type); > >> if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) { > >> return radv_lower_gather4_integer(ctx, tinfo, > >> instr, intr_name, > >> > (int)has_offset > >> + (int)is_shadow); > >> } > >> } > >> return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, > >> tinfo->dst_type, tinfo->args, tinfo->arg_count, > >> - AC_FUNC_ATTR_READNONE | > >> AC_FUNC_ATTR_NOUNWIND); > >> + AC_FUNC_ATTR_READNONE | > >> AC_FUNC_ATTR_NOUNWIND | > >> + AC_FUNC_ATTR_LEGACY); > >> > >> } > >> > >> static LLVMValueRef visit_vulkan_resource_index(struct > >> nir_to_llvm_context *ctx, > >> nir_intrinsic_instr > >> *instr) > >> { > >> LLVMValueRef index = get_src(ctx, instr->src[0]); > >> unsigned desc_set = nir_intrinsic_desc_set(instr); > >> unsigned binding = nir_intrinsic_binding(instr); > >> LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set]; > >> @@ -2006,21 +2012,23 @@ static LLVMValueRef visit_load_ubo_buffer(struct > >> nir_to_llvm_context *ctx, > >> if (instr->dest.ssa.bit_size == 64) > >> num_components *= 2; > >> > >> for (unsigned i = 0; i < num_components; ++i) { > >> LLVMValueRef params[] = { > >> rsrc, > >> LLVMBuildAdd(ctx->builder, > LLVMConstInt(ctx->i32, > >> 4 * i, 0), > >> offset, "") > >> }; > >> results[i] = ac_emit_llvm_intrinsic(&ctx->ac, > >> "llvm.SI.load.const", ctx->f32, > >> - params, 2, > >> AC_FUNC_ATTR_READNONE); > >> + params, 2, > >> + > AC_FUNC_ATTR_READNONE > >> | > >> + > AC_FUNC_ATTR_LEGACY); > >> } > >> > >> > >> ret = ac_build_gather_values(&ctx->ac, results, > >> instr->num_components); > >> return LLVMBuildBitCast(ctx->builder, ret, > >> get_def_type(ctx, &instr->dest.ssa), > ""); > >> } > >> > >> static void > >> radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail, > >> @@ -2103,21 +2111,23 @@ load_gs_input(struct nir_to_llvm_context *ctx, > >> args[1] = vtx_offset; > >> args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + > >> const_index) * 256, false); > >> args[3] = ctx->i32zero; > >> args[4] = ctx->i32one; /* OFFEN */ > >> args[5] = ctx->i32zero; /* IDXEN */ > >> args[6] = ctx->i32one; /* GLC */ > >> args[7] = ctx->i32zero; /* SLC */ > >> args[8] = ctx->i32zero; /* TFE */ > >> > >> value[i] = ac_emit_llvm_intrinsic(&ctx->ac, > >> "llvm.SI.buffer.load.dword.i32.i32", > >> - ctx->i32, args, 9, > >> AC_FUNC_ATTR_READONLY); > >> + ctx->i32, args, 9, > >> + AC_FUNC_ATTR_READONLY > | > >> + AC_FUNC_ATTR_LEGACY); > >> } > >> result = ac_build_gather_values(&ctx->ac, value, > >> instr->num_components); > >> > >> return result; > >> } > >> > >> static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, > >> nir_intrinsic_instr *instr) > >> { > >> LLVMValueRef values[8]; > >> @@ -2685,21 +2695,23 @@ static LLVMValueRef visit_image_size(struct > >> nir_to_llvm_context *ctx, > >> params[2] = LLVMConstInt(ctx->i32, 15, false); > >> params[3] = ctx->i32zero; > >> params[4] = ctx->i32zero; > >> params[5] = da ? ctx->i32one : ctx->i32zero; > >> params[6] = ctx->i32zero; > >> params[7] = ctx->i32zero; > >> params[8] = ctx->i32zero; > >> params[9] = ctx->i32zero; > >> > >> res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", > >> ctx->v4i32, > >> - params, 10, AC_FUNC_ATTR_READNONE); > >> + params, 10, > >> + AC_FUNC_ATTR_READNONE | > >> + AC_FUNC_ATTR_LEGACY); > >> > >> if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE && > >> glsl_sampler_type_is_array(type)) { > >> LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false); > >> LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false); > >> LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, > >> res, two, ""); > >> z = LLVMBuildSDiv(ctx->builder, z, six, ""); > >> res = LLVMBuildInsertElement(ctx->builder, res, z, two, > >> ""); > >> } > >> return res; > >> @@ -2729,21 +2741,21 @@ static void emit_discard_if(struct > >> nir_to_llvm_context *ctx, > >> > >> cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, > >> get_src(ctx, instr->src[0]), > >> ctx->i32zero, ""); > >> > >> cond = LLVMBuildSelect(ctx->builder, cond, > >> LLVMConstReal(ctx->f32, -1.0f), > >> ctx->f32zero, ""); > >> ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill", > >> ctx->voidt, > >> - &cond, 1, 0); > >> + &cond, 1, AC_FUNC_ATTR_LEGACY); > >> } > >> > >> static LLVMValueRef > >> visit_load_local_invocation_index(struct nir_to_llvm_context *ctx) > >> { > >> LLVMValueRef result; > >> LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac); > >> result = LLVMBuildAnd(ctx->builder, ctx->tg_size, > >> LLVMConstInt(ctx->i32, 0xfc0, false), ""); > >> > >> @@ -2984,21 +2996,21 @@ visit_emit_vertex(struct nir_to_llvm_context > *ctx, > >> * have any effect, and GS threads have no externally observable > >> * effects other than emitting vertices. > >> */ > >> can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, > gs_next_vertex, > >> LLVMConstInt(ctx->i32, > >> ctx->gs_max_out_vertices, false), ""); > >> > >> kill = LLVMBuildSelect(ctx->builder, can_emit, > >> LLVMConstReal(ctx->f32, 1.0f), > >> LLVMConstReal(ctx->f32, -1.0f), ""); > >> ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill", > >> - ctx->voidt, &kill, 1, 0); > >> + ctx->voidt, &kill, 1, AC_FUNC_ATTR_LEGACY); > >> > >> /* loop num outputs */ > >> idx = 0; > >> for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { > >> LLVMValueRef *out_ptr = &ctx->outputs[i * 4]; > >> if (!(ctx->output_mask & (1ull << i))) > >> continue; > >> > >> for (unsigned j = 0; j < 4; j++) { > >> LLVMValueRef out_val = > LLVMBuildLoad(ctx->builder, > >> @@ -3144,21 +3156,21 @@ static void visit_intrinsic(struct > >> nir_to_llvm_context *ctx, > >> case nir_intrinsic_image_atomic_comp_swap: > >> result = visit_image_atomic(ctx, instr); > >> break; > >> case nir_intrinsic_image_size: > >> result = visit_image_size(ctx, instr); > >> break; > >> case nir_intrinsic_discard: > >> ctx->shader_info->fs.can_discard = true; > >> ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp", > >> ctx->voidt, > >> - NULL, 0, 0); > >> + NULL, 0, AC_FUNC_ATTR_LEGACY); > >> break; > >> case nir_intrinsic_discard_if: > >> emit_discard_if(ctx, instr); > >> break; > >> case nir_intrinsic_memory_barrier: > >> emit_waitcnt(ctx); > >> break; > >> case nir_intrinsic_barrier: > >> emit_barrier(ctx); > >> break; > >> @@ -3924,21 +3936,22 @@ handle_vs_input_decl(struct nir_to_llvm_context > >> *ctx, > >> > >> for (unsigned i = 0; i < attrib_count; ++i, ++idx) { > >> t_offset = LLVMConstInt(ctx->i32, index + i, false); > >> > >> t_list = ac_build_indexed_load_const(&ctx->ac, > t_list_ptr, > >> t_offset); > >> args[0] = t_list; > >> args[1] = LLVMConstInt(ctx->i32, 0, false); > >> args[2] = buffer_index; > >> input = ac_emit_llvm_intrinsic(&ctx->ac, > >> "llvm.SI.vs.load.input", ctx->v4f32, args, 3, > >> - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND); > >> + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND | > >> + AC_FUNC_ATTR_LEGACY); > >> > >> for (unsigned chan = 0; chan < 4; chan++) { > >> LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, > >> chan, false); > >> ctx->inputs[radeon_llvm_reg_index_soa(idx, > chan)] > >> = > >> to_integer(ctx, > >> LLVMBuildExtractElement(ctx->builder, > >> input, > llvm_chan, > >> "")); > >> } > >> } > >> } > >> > >> @@ -4257,22 +4270,23 @@ si_llvm_init_export_args(struct > >> nir_to_llvm_context *ctx, > >> args[4] = ctx->i32one; > >> > >> for (unsigned chan = 0; chan < 2; chan++) { > >> LLVMValueRef pack_args[2] = { > >> values[2 * chan], > >> values[2 * chan + 1] > >> }; > >> LLVMValueRef packed; > >> > >> packed = > ac_emit_llvm_intrinsic(&ctx->ac, > >> "llvm.SI.packf16", > >> - ctx->i32, > >> pack_args, 2, > >> - > >> AC_FUNC_ATTR_READNONE); > >> + > ctx->i32, > >> pack_args, 2, > >> + > >> AC_FUNC_ATTR_READNONE | > >> + > >> AC_FUNC_ATTR_LEGACY); > >> args[chan + 5] = packed; > >> } > >> break; > >> > >> case V_028714_SPI_SHADER_UNORM16_ABGR: > >> for (unsigned chan = 0; chan < 4; chan++) { > >> val[chan] = emit_float_saturate(ctx, > >> values[chan], 0, 1); > >> val[chan] = LLVMBuildFMul(ctx->builder, > >> val[chan], > >> > >> LLVMConstReal(ctx->f32, 65535), ""); > >> val[chan] = LLVMBuildFAdd(ctx->builder, > >> val[chan], > >> @@ -4443,21 +4457,22 @@ handle_vs_outputs_post(struct > nir_to_llvm_context > >> *ctx) > >> si_llvm_init_export_args(ctx, values, target, args); > >> > >> if (target >= V_008DFC_SQ_EXP_POS && > >> target <= (V_008DFC_SQ_EXP_POS + 3)) { > >> memcpy(pos_args[target - V_008DFC_SQ_EXP_POS], > >> args, sizeof(args)); > >> } else { > >> ac_emit_llvm_intrinsic(&ctx->ac, > >> "llvm.SI.export", > >> ctx->voidt, > >> - args, 9, 0); > >> + args, 9, > >> + AC_FUNC_ATTR_LEGACY); > >> } > >> } > >> > >> /* We need to add the position output manually if it's missing. > */ > >> if (!pos_args[0][0]) { > >> pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false); > >> pos_args[0][1] = ctx->i32zero; /* EXEC mask */ > >> pos_args[0][2] = ctx->i32zero; /* last export? */ > >> pos_args[0][3] = LLVMConstInt(ctx->i32, > >> V_008DFC_SQ_EXP_POS, false); > >> pos_args[0][4] = ctx->i32zero; /* COMPR flag */ > >> @@ -4498,21 +4513,22 @@ handle_vs_outputs_post(struct > nir_to_llvm_context > >> *ctx) > >> if (!pos_args[i][0]) > >> continue; > >> > >> /* Specify the target we are exporting */ > >> pos_args[i][3] = LLVMConstInt(ctx->i32, > >> V_008DFC_SQ_EXP_POS + pos_idx++, false); > >> if (pos_idx == num_pos_exports) > >> pos_args[i][2] = ctx->i32one; > >> ac_emit_llvm_intrinsic(&ctx->ac, > >> "llvm.SI.export", > >> ctx->voidt, > >> - pos_args[i], 9, 0); > >> + pos_args[i], 9, > >> + AC_FUNC_ATTR_LEGACY); > >> } > >> > >> ctx->shader_info->vs.pos_exports = num_pos_exports; > >> ctx->shader_info->vs.param_exports = param_count; > >> } > >> > >> static void > >> handle_es_outputs_post(struct nir_to_llvm_context *ctx) > >> { > >> int j; > >> @@ -4554,21 +4570,22 @@ si_export_mrt_color(struct nir_to_llvm_context > >> *ctx, > >> si_llvm_init_export_args(ctx, color, param, > >> args); > >> > >> if (is_last) { > >> args[1] = ctx->i32one; /* whether the EXEC mask is valid > >> */ > >> args[2] = ctx->i32one; /* DONE bit */ > >> } else if (args[0] == ctx->i32zero) > >> return; /* unnecessary NULL export */ > >> > >> ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export", > >> - ctx->voidt, args, 9, 0); > >> + ctx->voidt, args, 9, > >> + AC_FUNC_ATTR_LEGACY); > >> } > >> > >> static void > >> si_export_mrt_z(struct nir_to_llvm_context *ctx, > >> LLVMValueRef depth, LLVMValueRef stencil, > >> LLVMValueRef samplemask) > >> { > >> LLVMValueRef args[9]; > >> unsigned mask = 0; > >> args[1] = ctx->i32one; /* whether the EXEC mask is valid */ > >> @@ -4598,21 +4615,22 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx, > >> } > >> > >> /* SI (except OLAND) has a bug that it only looks > >> * at the X writemask component. */ > >> if (ctx->options->chip_class == SI && > >> ctx->options->family != CHIP_OLAND) > >> mask |= 0x01; > >> > >> args[0] = LLVMConstInt(ctx->i32, mask, false); > >> ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export", > >> - ctx->voidt, args, 9, 0); > >> + ctx->voidt, args, 9, > >> + AC_FUNC_ATTR_LEGACY); > >> } > >> > >> static void > >> handle_fs_outputs_post(struct nir_to_llvm_context *ctx) > >> { > >> unsigned index = 0; > >> LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; > >> > >> for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { > >> LLVMValueRef values[4]; > >> @@ -5021,21 +5039,22 @@ ac_gs_copy_shader_emit(struct > nir_to_llvm_context > >> *ctx) > >> > >> for (unsigned j = 0; j < 4; j++) { > >> LLVMValueRef value; > >> args[2] = LLVMConstInt(ctx->i32, > >> (idx * 4 + j) * > >> ctx->gs_max_out_vertices > * > >> 16 * 4, false); > >> > >> value = ac_emit_llvm_intrinsic(&ctx->ac, > >> > >> "llvm.SI.buffer.load.dword.i32.i32", > >> ctx->i32, args, > 9, > >> - > >> AC_FUNC_ATTR_READONLY); > >> + > >> AC_FUNC_ATTR_READONLY | > >> + > >> AC_FUNC_ATTR_LEGACY); > >> > >> LLVMBuildStore(ctx->builder, > >> to_float(ctx, value), > >> ctx->outputs[radeon_llvm_reg_index_soa(i, j)]); > >> } > >> idx++; > >> } > >> handle_vs_outputs_post(ctx); > >> } > >> > >> void ac_create_gs_copy_shader(LLVMTargetMachineRef tm, > >> diff --git a/src/gallium/auxiliary/draw/draw_llvm.c > >> b/src/gallium/auxiliary/draw/draw_llvm.c > >> index 8952dc8..586a9be 100644 > >> --- a/src/gallium/auxiliary/draw/draw_llvm.c > >> +++ b/src/gallium/auxiliary/draw/draw_llvm.c > >> @@ -1588,21 +1588,22 @@ draw_llvm_generate(struct draw_llvm *llvm, > struct > >> draw_llvm_variant *variant) > >> > >> func_type = LLVMFunctionType(LLVMInt8TypeInContext(context), > >> arg_types, num_arg_types, 0); > >> > >> variant_func = LLVMAddFunction(gallivm->module, func_name, > func_type); > >> variant->function = variant_func; > >> > >> LLVMSetFunctionCallConv(variant_func, LLVMCCallConv); > >> for (i = 0; i < num_arg_types; ++i) > >> if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) > >> - lp_add_function_attr(variant_func, i + 1, > LP_FUNC_ATTR_NOALIAS); > >> + lp_add_function_attr(context, variant_func, i + 1, > >> + LP_FUNC_ATTR_NOALIAS); > >> > >> context_ptr = LLVMGetParam(variant_func, 0); > >> io_ptr = LLVMGetParam(variant_func, 1); > >> vbuffers_ptr = LLVMGetParam(variant_func, 2); > >> count = LLVMGetParam(variant_func, 3); > >> /* > >> * XXX: the maxelt part is unused. Not really useful, since we > cannot > >> * get index buffer overflows due to vsplit (which provides its own > >> * elts buffer, with a different size than what's passed in here). > >> */ > >> @@ -2262,21 +2263,22 @@ draw_gs_llvm_generate(struct draw_llvm *llvm, > >> func_type = LLVMFunctionType(int32_type, arg_types, > >> ARRAY_SIZE(arg_types), 0); > >> > >> variant_func = LLVMAddFunction(gallivm->module, func_name, > func_type); > >> > >> variant->function = variant_func; > >> > >> LLVMSetFunctionCallConv(variant_func, LLVMCCallConv); > >> > >> for (i = 0; i < ARRAY_SIZE(arg_types); ++i) > >> if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) > >> - lp_add_function_attr(variant_func, i + 1, > LP_FUNC_ATTR_NOALIAS); > >> + lp_add_function_attr(context, variant_func, i + 1, > >> + LP_FUNC_ATTR_NOALIAS); > >> > >> context_ptr = LLVMGetParam(variant_func, 0); > >> input_array = LLVMGetParam(variant_func, 1); > >> io_ptr = LLVMGetParam(variant_func, 2); > >> num_prims = LLVMGetParam(variant_func, 3); > >> system_values.instance_id = LLVMGetParam(variant_func, 4); > >> prim_id_ptr = LLVMGetParam(variant_func, 5); > >> system_values.invocation_id = LLVMGetParam(variant_func, 6); > >> > >> lp_build_name(context_ptr, "context"); > >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c > >> b/src/gallium/auxiliary/gallivm/lp_bld_intr.c > >> index 049671a..1b50e68 100644 > >> --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c > >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c > >> @@ -152,83 +152,100 @@ static const char *attr_to_str(enum lp_func_attr > >> attr) > >> case LP_FUNC_ATTR_READONLY: return "readonly"; > >> default: > >> _debug_printf("Unhandled function attribute: %x\n", attr); > >> return 0; > >> } > >> } > >> > >> #endif > >> > >> void > >> -lp_add_function_attr(LLVMValueRef function, > >> - int attr_idx, > >> - enum lp_func_attr attr) > >> +lp_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, > > > > > > If function is not always a function, then it's better the parameter is > > renamed to functionOrCall or something like that. > > > >> + int attr_idx, enum lp_func_attr attr) > >> { > >> > >> #if HAVE_LLVM < 0x0400 > >> LLVMAttribute llvm_attr = lp_attr_to_llvm_attr(attr); > >> if (attr_idx == -1) { > >> LLVMAddFunctionAttr(function, llvm_attr); > >> } else { > >> LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), > llvm_attr); > >> } > >> #else > >> - LLVMContextRef context = > >> LLVMGetModuleContext(LLVMGetGlobalParent(function)); > > > > > > Even when LLVMIsAFunction(function) is false, we could still get the > > LLVMContextRef: > > > > LLVMModuleRef module; > > if (LLVMIsAFunction(functionOrCall)) { > > module = LLVMGetGlobalParent(functionOrCall); > > } else { > > LLVMBasicBlockRef bb = LLVMValueAsBasicBlock(functionOrCall); > > The correct function is LLVMGetInstructionParent. > > > LLVMValueRef function = LLVMGetBasicBlockParent(bb) > > module = LLVMGetGlobalParent(function); > > > > } > > LLVMContextRef context = LLVMGetModuleContext(module); > > > > This would enable to keep lp_add_function_attr prototype alone. > > > > Otherwise looks good to me. > > > > Reviewed-by: Jose Fonseca <jfons...@vmware.com> > > Thanks. > > Marek > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev