From: Marek Olšák <marek.ol...@amd.com> They can vary at call sites if the intrinsic is NOT a legacy SI intrinsic. We need this to force readnone or inaccessiblememonly on some amdgcn intrinsics.
This is only used with LLVM 4.0 and later. Intrinsics only used with LLVM <= 3.9 don't need the LEGACY flag. gallivm and ac code is in the same patch, because splitting would be more complicated with all the LEGACY uses all over the place. --- src/amd/common/ac_llvm_build.c | 23 ++++---- src/amd/common/ac_llvm_util.c | 31 +++++++---- src/amd/common/ac_llvm_util.h | 17 +++--- src/amd/common/ac_nir_to_llvm.c | 63 ++++++++++++++-------- src/gallium/auxiliary/draw/draw_llvm.c | 6 ++- src/gallium/auxiliary/gallivm/lp_bld_intr.c | 51 ++++++++++++------ src/gallium/auxiliary/gallivm/lp_bld_intr.h | 11 ++-- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 3 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 3 +- src/gallium/drivers/llvmpipe/lp_state_setup.c | 7 +-- src/gallium/drivers/radeonsi/si_shader.c | 64 ++++++++++++++--------- src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 14 +++-- 12 files changed, 184 insertions(+), 109 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 2f25b14..5c8b7f7 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -75,47 +75,50 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context) "amdgpu.uniform", 14); ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0); } LLVMValueRef ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name, LLVMTypeRef return_type, LLVMValueRef *params, unsigned param_count, unsigned attrib_mask) { - LLVMValueRef function; + LLVMValueRef function, call; + bool set_callsite_attrs = HAVE_LLVM >= 0x0400 && + !(attrib_mask & AC_FUNC_ATTR_LEGACY); function = LLVMGetNamedFunction(ctx->module, name); if (!function) { LLVMTypeRef param_types[32], function_type; unsigned i; assert(param_count <= 32); for (i = 0; i < param_count; ++i) { assert(params[i]); param_types[i] = LLVMTypeOf(params[i]); } function_type = LLVMFunctionType(return_type, param_types, param_count, 0); function = LLVMAddFunction(ctx->module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); - attrib_mask |= AC_FUNC_ATTR_NOUNWIND; - while (attrib_mask) { - enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask); - ac_add_function_attr(function, -1, attr); - } + if (!set_callsite_attrs) + ac_add_func_attributes(ctx->context, function, attrib_mask); } - return LLVMBuildCall(ctx->builder, function, params, param_count, ""); + + call = LLVMBuildCall(ctx->builder, function, params, param_count, ""); + if (set_callsite_attrs) + ac_add_func_attributes(ctx->context, call, attrib_mask); + return call; } LLVMValueRef ac_build_gather_values_extended(struct ac_llvm_context *ctx, LLVMValueRef *values, unsigned value_count, unsigned value_stride, bool load) { LLVMBuilderRef builder = ctx->builder; @@ -524,21 +527,22 @@ ac_build_tbuffer_store(struct ac_llvm_context *ctx, /* The instruction offset field has 12 bits */ assert(offen || inst_offset < (1 << 12)); /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */ unsigned func = CLAMP(num_channels, 1, 3) - 1; const char *types[] = {"i32", "v2i32", "v4i32"}; char name[256]; snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]); ac_emit_llvm_intrinsic(ctx, name, ctx->voidt, - args, ARRAY_SIZE(args), 0); + args, ARRAY_SIZE(args), + AC_FUNC_ATTR_LEGACY); } void ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, unsigned num_channels, LLVMValueRef vaddr, LLVMValueRef soffset, unsigned inst_offset) @@ -836,12 +840,13 @@ LLVMValueRef ac_emit_clamp(struct ac_llvm_context *ctx, LLVMValueRef value) const char *intr = HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." : "llvm.AMDIL.clamp."; LLVMValueRef args[3] = { value, LLVMConstReal(ctx->f32, 0), LLVMConstReal(ctx->f32, 1), }; return ac_emit_llvm_intrinsic(ctx, intr, ctx->f32, args, 3, - AC_FUNC_ATTR_READNONE); + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_LEGACY); } diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index be127c5..fb525dd 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -17,21 +17,21 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * */ /* based on pieces from si_pipe.c and radeon_llvm_emit.c */ #include "ac_llvm_util.h" - +#include "util/bitscan.h" #include <llvm-c/Core.h> #include "c11/threads.h" #include <assert.h> #include <stdio.h> #include <string.h> static void ac_init_llvm_target() { @@ -173,40 +173,53 @@ static const char *attr_to_str(enum ac_func_attr attr) case AC_FUNC_ATTR_READNONE: return "readnone"; case AC_FUNC_ATTR_READONLY: return "readonly"; default: fprintf(stderr, "Unhandled function attribute: %x\n", attr); return 0; } } #endif -void -ac_add_function_attr(LLVMValueRef function, - int attr_idx, - enum ac_func_attr attr) +static void +ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, + int attr_idx, enum ac_func_attr attr) { - #if HAVE_LLVM < 0x0400 LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr); if (attr_idx == -1) { LLVMAddFunctionAttr(function, llvm_attr); } else { LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr); } #else - LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function)); const char *attr_name = attr_to_str(attr); unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name)); - LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0); - LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); + LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0); + + if (LLVMIsAFunction(function)) + LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); + else + LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr); #endif } +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, + unsigned attrib_mask) +{ + attrib_mask |= AC_FUNC_ATTR_NOUNWIND; + attrib_mask &= ~AC_FUNC_ATTR_LEGACY; + + while (attrib_mask) { + enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask); + ac_add_function_attr(ctx, function, -1, attr); + } +} + void ac_dump_module(LLVMModuleRef module) { char *str = LLVMPrintModuleToString(module); fprintf(stderr, "%s", str); LLVMDisposeMessage(str); } diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h index 93d3d27..4fe4ab4 100644 --- a/src/amd/common/ac_llvm_util.h +++ b/src/amd/common/ac_llvm_util.h @@ -34,28 +34,29 @@ extern "C" { #endif enum ac_func_attr { AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0), AC_FUNC_ATTR_BYVAL = (1 << 1), AC_FUNC_ATTR_INREG = (1 << 2), AC_FUNC_ATTR_NOALIAS = (1 << 3), AC_FUNC_ATTR_NOUNWIND = (1 << 4), AC_FUNC_ATTR_READNONE = (1 << 5), AC_FUNC_ATTR_READONLY = (1 << 6), + + /* Legacy intrinsic that needs attributes on function declarations + * and they must match the internal LLVM definition exactly, otherwise + * intrinsic selection fails. + */ + AC_FUNC_ATTR_LEGACY = (1u << 31), }; LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill); void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes); bool ac_is_sgpr_param(LLVMValueRef param); - -void -ac_add_function_attr(LLVMValueRef function, - int attr_idx, - enum ac_func_attr attr); - -void -ac_dump_module(LLVMModuleRef module); +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, + unsigned attrib_mask); +void ac_dump_module(LLVMModuleRef module); #ifdef __cplusplus } #endif diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index b3dc63c..4ec19d5 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1039,26 +1039,27 @@ static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx, src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, ""); src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, ""); dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), ""); result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); return result; } static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx, - const char *intrin, + const char *intrin, unsigned attr_mask, LLVMValueRef srcs[3]) { LLVMValueRef result; LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), ""); - result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE); + result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, + AC_FUNC_ATTR_READNONE | attr_mask); result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, ""); return result; } static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2, LLVMValueRef src3) { LLVMValueRef bfi_args[3], result; @@ -1418,24 +1419,26 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) break; case nir_op_fmin: result = emit_intrin_2f_param(ctx, "llvm.minnum", to_float_type(ctx, def_type), src[0], src[1]); break; case nir_op_ffma: result = emit_intrin_3f_param(ctx, "llvm.fma", to_float_type(ctx, def_type), src[0], src[1], src[2]); break; case nir_op_ibitfield_extract: - result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src); + result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", + AC_FUNC_ATTR_LEGACY, src); break; case nir_op_ubitfield_extract: - result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src); + result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", + AC_FUNC_ATTR_LEGACY, src); break; case nir_op_bitfield_insert: result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]); break; case nir_op_bitfield_reverse: result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); break; case nir_op_bit_count: result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); break; @@ -1635,22 +1638,23 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, txq_args[txq_arg_count++] = tinfo->args[1]; txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* dmask */ txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* unorm */ txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */ txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0); txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */ txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */ txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */ txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */ size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32, - txq_args, txq_arg_count, - AC_FUNC_ATTR_READNONE); + txq_args, txq_arg_count, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_LEGACY); for (c = 0; c < 2; c++) { half_texel[c] = LLVMBuildExtractElement(ctx->builder, size, LLVMConstInt(ctx->i32, c, false), ""); half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, ""); half_texel[c] = ac_emit_fdiv(&ctx->ac, ctx->f32one, half_texel[c]); half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c], LLVMConstReal(ctx->f32, -0.5), ""); } } @@ -1660,21 +1664,22 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0); tmp = LLVMBuildExtractElement(ctx->builder, coord, index, ""); tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, ""); tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], ""); tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, ""); coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, ""); } tinfo->args[0] = coord; return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count, - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND); + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND | + AC_FUNC_ATTR_LEGACY); } static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx, nir_tex_instr *instr, struct ac_tex_info *tinfo) { const char *name = "llvm.SI.image.sample"; const char *infix = ""; char intr_name[127]; @@ -1728,21 +1733,22 @@ static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx, has_offset ? ".o" : "", type); if (instr->op == nir_texop_tg4) { enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) { return radv_lower_gather4_integer(ctx, tinfo, instr, intr_name, (int)has_offset + (int)is_shadow); } } return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count, - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND); + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND | + AC_FUNC_ATTR_LEGACY); } static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef index = get_src(ctx, instr->src[0]); unsigned desc_set = nir_intrinsic_desc_set(instr); unsigned binding = nir_intrinsic_binding(instr); LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set]; @@ -2006,21 +2012,23 @@ static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx, if (instr->dest.ssa.bit_size == 64) num_components *= 2; for (unsigned i = 0; i < num_components; ++i) { LLVMValueRef params[] = { rsrc, LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0), offset, "") }; results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32, - params, 2, AC_FUNC_ATTR_READNONE); + params, 2, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_LEGACY); } ret = ac_build_gather_values(&ctx->ac, results, instr->num_components); return LLVMBuildBitCast(ctx->builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); } static void radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail, @@ -2103,21 +2111,23 @@ load_gs_input(struct nir_to_llvm_context *ctx, args[1] = vtx_offset; args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false); args[3] = ctx->i32zero; args[4] = ctx->i32one; /* OFFEN */ args[5] = ctx->i32zero; /* IDXEN */ args[6] = ctx->i32one; /* GLC */ args[7] = ctx->i32zero; /* SLC */ args[8] = ctx->i32zero; /* TFE */ value[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32", - ctx->i32, args, 9, AC_FUNC_ATTR_READONLY); + ctx->i32, args, 9, + AC_FUNC_ATTR_READONLY | + AC_FUNC_ATTR_LEGACY); } result = ac_build_gather_values(&ctx->ac, value, instr->num_components); return result; } static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef values[8]; @@ -2685,21 +2695,23 @@ static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx, params[2] = LLVMConstInt(ctx->i32, 15, false); params[3] = ctx->i32zero; params[4] = ctx->i32zero; params[5] = da ? ctx->i32one : ctx->i32zero; params[6] = ctx->i32zero; params[7] = ctx->i32zero; params[8] = ctx->i32zero; params[9] = ctx->i32zero; res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32, - params, 10, AC_FUNC_ATTR_READNONE); + params, 10, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_LEGACY); if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE && glsl_sampler_type_is_array(type)) { LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false); LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false); LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, ""); z = LLVMBuildSDiv(ctx->builder, z, six, ""); res = LLVMBuildInsertElement(ctx->builder, res, z, two, ""); } return res; @@ -2729,21 +2741,21 @@ static void emit_discard_if(struct nir_to_llvm_context *ctx, cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, get_src(ctx, instr->src[0]), ctx->i32zero, ""); cond = LLVMBuildSelect(ctx->builder, cond, LLVMConstReal(ctx->f32, -1.0f), ctx->f32zero, ""); ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill", ctx->voidt, - &cond, 1, 0); + &cond, 1, AC_FUNC_ATTR_LEGACY); } static LLVMValueRef visit_load_local_invocation_index(struct nir_to_llvm_context *ctx) { LLVMValueRef result; LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac); result = LLVMBuildAnd(ctx->builder, ctx->tg_size, LLVMConstInt(ctx->i32, 0xfc0, false), ""); @@ -2984,21 +2996,21 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx, * have any effect, and GS threads have no externally observable * effects other than emitting vertices. */ can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex, LLVMConstInt(ctx->i32, ctx->gs_max_out_vertices, false), ""); kill = LLVMBuildSelect(ctx->builder, can_emit, LLVMConstReal(ctx->f32, 1.0f), LLVMConstReal(ctx->f32, -1.0f), ""); ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill", - ctx->voidt, &kill, 1, 0); + ctx->voidt, &kill, 1, AC_FUNC_ATTR_LEGACY); /* loop num outputs */ idx = 0; for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { LLVMValueRef *out_ptr = &ctx->outputs[i * 4]; if (!(ctx->output_mask & (1ull << i))) continue; for (unsigned j = 0; j < 4; j++) { LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, @@ -3144,21 +3156,21 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, case nir_intrinsic_image_atomic_comp_swap: result = visit_image_atomic(ctx, instr); break; case nir_intrinsic_image_size: result = visit_image_size(ctx, instr); break; case nir_intrinsic_discard: ctx->shader_info->fs.can_discard = true; ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp", ctx->voidt, - NULL, 0, 0); + NULL, 0, AC_FUNC_ATTR_LEGACY); break; case nir_intrinsic_discard_if: emit_discard_if(ctx, instr); break; case nir_intrinsic_memory_barrier: emit_waitcnt(ctx); break; case nir_intrinsic_barrier: emit_barrier(ctx); break; @@ -3924,21 +3936,22 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx, for (unsigned i = 0; i < attrib_count; ++i, ++idx) { t_offset = LLVMConstInt(ctx->i32, index + i, false); t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset); args[0] = t_list; args[1] = LLVMConstInt(ctx->i32, 0, false); args[2] = buffer_index; input = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.vs.load.input", ctx->v4f32, args, 3, - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND); + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND | + AC_FUNC_ATTR_LEGACY); for (unsigned chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false); ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] = to_integer(ctx, LLVMBuildExtractElement(ctx->builder, input, llvm_chan, "")); } } } @@ -4257,22 +4270,23 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, args[4] = ctx->i32one; for (unsigned chan = 0; chan < 2; chan++) { LLVMValueRef pack_args[2] = { values[2 * chan], values[2 * chan + 1] }; LLVMValueRef packed; packed = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.packf16", - ctx->i32, pack_args, 2, - AC_FUNC_ATTR_READNONE); + ctx->i32, pack_args, 2, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_LEGACY); args[chan + 5] = packed; } break; case V_028714_SPI_SHADER_UNORM16_ABGR: for (unsigned chan = 0; chan < 4; chan++) { val[chan] = emit_float_saturate(ctx, values[chan], 0, 1); val[chan] = LLVMBuildFMul(ctx->builder, val[chan], LLVMConstReal(ctx->f32, 65535), ""); val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], @@ -4443,21 +4457,22 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) si_llvm_init_export_args(ctx, values, target, args); if (target >= V_008DFC_SQ_EXP_POS && target <= (V_008DFC_SQ_EXP_POS + 3)) { memcpy(pos_args[target - V_008DFC_SQ_EXP_POS], args, sizeof(args)); } else { ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export", ctx->voidt, - args, 9, 0); + args, 9, + AC_FUNC_ATTR_LEGACY); } } /* We need to add the position output manually if it's missing. */ if (!pos_args[0][0]) { pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false); pos_args[0][1] = ctx->i32zero; /* EXEC mask */ pos_args[0][2] = ctx->i32zero; /* last export? */ pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, false); pos_args[0][4] = ctx->i32zero; /* COMPR flag */ @@ -4498,21 +4513,22 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) if (!pos_args[i][0]) continue; /* Specify the target we are exporting */ pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + pos_idx++, false); if (pos_idx == num_pos_exports) pos_args[i][2] = ctx->i32one; ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export", ctx->voidt, - pos_args[i], 9, 0); + pos_args[i], 9, + AC_FUNC_ATTR_LEGACY); } ctx->shader_info->vs.pos_exports = num_pos_exports; ctx->shader_info->vs.param_exports = param_count; } static void handle_es_outputs_post(struct nir_to_llvm_context *ctx) { int j; @@ -4554,21 +4570,22 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx, si_llvm_init_export_args(ctx, color, param, args); if (is_last) { args[1] = ctx->i32one; /* whether the EXEC mask is valid */ args[2] = ctx->i32one; /* DONE bit */ } else if (args[0] == ctx->i32zero) return; /* unnecessary NULL export */ ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export", - ctx->voidt, args, 9, 0); + ctx->voidt, args, 9, + AC_FUNC_ATTR_LEGACY); } static void si_export_mrt_z(struct nir_to_llvm_context *ctx, LLVMValueRef depth, LLVMValueRef stencil, LLVMValueRef samplemask) { LLVMValueRef args[9]; unsigned mask = 0; args[1] = ctx->i32one; /* whether the EXEC mask is valid */ @@ -4598,21 +4615,22 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx, } /* SI (except OLAND) has a bug that it only looks * at the X writemask component. */ if (ctx->options->chip_class == SI && ctx->options->family != CHIP_OLAND) mask |= 0x01; args[0] = LLVMConstInt(ctx->i32, mask, false); ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export", - ctx->voidt, args, 9, 0); + ctx->voidt, args, 9, + AC_FUNC_ATTR_LEGACY); } static void handle_fs_outputs_post(struct nir_to_llvm_context *ctx) { unsigned index = 0; LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { LLVMValueRef values[4]; @@ -5021,21 +5039,22 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx) for (unsigned j = 0; j < 4; j++) { LLVMValueRef value; args[2] = LLVMConstInt(ctx->i32, (idx * 4 + j) * ctx->gs_max_out_vertices * 16 * 4, false); value = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32", ctx->i32, args, 9, - AC_FUNC_ATTR_READONLY); + AC_FUNC_ATTR_READONLY | + AC_FUNC_ATTR_LEGACY); LLVMBuildStore(ctx->builder, to_float(ctx, value), ctx->outputs[radeon_llvm_reg_index_soa(i, j)]); } idx++; } handle_vs_outputs_post(ctx); } void ac_create_gs_copy_shader(LLVMTargetMachineRef tm, diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 8952dc8..586a9be 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1588,21 +1588,22 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) func_type = LLVMFunctionType(LLVMInt8TypeInContext(context), arg_types, num_arg_types, 0); variant_func = LLVMAddFunction(gallivm->module, func_name, func_type); variant->function = variant_func; LLVMSetFunctionCallConv(variant_func, LLVMCCallConv); for (i = 0; i < num_arg_types; ++i) if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS); + lp_add_function_attr(context, variant_func, i + 1, + LP_FUNC_ATTR_NOALIAS); context_ptr = LLVMGetParam(variant_func, 0); io_ptr = LLVMGetParam(variant_func, 1); vbuffers_ptr = LLVMGetParam(variant_func, 2); count = LLVMGetParam(variant_func, 3); /* * XXX: the maxelt part is unused. Not really useful, since we cannot * get index buffer overflows due to vsplit (which provides its own * elts buffer, with a different size than what's passed in here). */ @@ -2262,21 +2263,22 @@ draw_gs_llvm_generate(struct draw_llvm *llvm, func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0); variant_func = LLVMAddFunction(gallivm->module, func_name, func_type); variant->function = variant_func; LLVMSetFunctionCallConv(variant_func, LLVMCCallConv); for (i = 0; i < ARRAY_SIZE(arg_types); ++i) if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS); + lp_add_function_attr(context, variant_func, i + 1, + LP_FUNC_ATTR_NOALIAS); context_ptr = LLVMGetParam(variant_func, 0); input_array = LLVMGetParam(variant_func, 1); io_ptr = LLVMGetParam(variant_func, 2); num_prims = LLVMGetParam(variant_func, 3); system_values.instance_id = LLVMGetParam(variant_func, 4); prim_id_ptr = LLVMGetParam(variant_func, 5); system_values.invocation_id = LLVMGetParam(variant_func, 6); lp_build_name(context_ptr, "context"); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c index 049671a..1b50e68 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c @@ -152,83 +152,100 @@ static const char *attr_to_str(enum lp_func_attr attr) case LP_FUNC_ATTR_READONLY: return "readonly"; default: _debug_printf("Unhandled function attribute: %x\n", attr); return 0; } } #endif void -lp_add_function_attr(LLVMValueRef function, - int attr_idx, - enum lp_func_attr attr) +lp_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, + int attr_idx, enum lp_func_attr attr) { #if HAVE_LLVM < 0x0400 LLVMAttribute llvm_attr = lp_attr_to_llvm_attr(attr); if (attr_idx == -1) { LLVMAddFunctionAttr(function, llvm_attr); } else { LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr); } #else - LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function)); const char *attr_name = attr_to_str(attr); unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name)); - LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0); - LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); + LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0); + + if (LLVMIsAFunction(function)) + LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); + else + LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr); #endif } +static void +lp_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, + unsigned attrib_mask) +{ + /* NoUnwind indicates that the intrinsic never raises a C++ exception. + * Set it for all intrinsics. + */ + attrib_mask |= LP_FUNC_ATTR_NOUNWIND; + attrib_mask &= ~LP_FUNC_ATTR_LEGACY; + + while (attrib_mask) { + enum lp_func_attr attr = 1u << u_bit_scan(&attrib_mask); + lp_add_function_attr(ctx, function, -1, attr); + } +} + LLVMValueRef lp_build_intrinsic(LLVMBuilderRef builder, const char *name, LLVMTypeRef ret_type, LLVMValueRef *args, unsigned num_args, unsigned attr_mask) { LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); - LLVMValueRef function; + LLVMContextRef ctx = LLVMGetModuleContext(module); + LLVMValueRef function, call; + bool set_callsite_attrs = HAVE_LLVM >= 0x0400 && + !(attr_mask & LP_FUNC_ATTR_LEGACY); function = LLVMGetNamedFunction(module, name); if(!function) { LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS]; unsigned i; assert(num_args <= LP_MAX_FUNC_ARGS); for(i = 0; i < num_args; ++i) { assert(args[i]); arg_types[i] = LLVMTypeOf(args[i]); } function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args); - /* NoUnwind indicates that the intrinsic never raises a C++ exception. - * Set it for all intrinsics. - */ - attr_mask |= LP_FUNC_ATTR_NOUNWIND; - - while (attr_mask) { - enum lp_func_attr attr = 1 << u_bit_scan(&attr_mask); - lp_add_function_attr(function, -1, attr); - } + if (!set_callsite_attrs) + lp_add_func_attributes(ctx, function, attr_mask); if (gallivm_debug & GALLIVM_DEBUG_IR) { lp_debug_dump_value(function); } } - return LLVMBuildCall(builder, function, args, num_args, ""); + call = LLVMBuildCall(builder, function, args, num_args, ""); + if (set_callsite_attrs) + lp_add_func_attributes(ctx, call, attr_mask); + return call; } LLVMValueRef lp_build_intrinsic_unary(LLVMBuilderRef builder, const char *name, LLVMTypeRef ret_type, LLVMValueRef a) { return lp_build_intrinsic(builder, name, ret_type, &a, 1, 0); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h index 039e9ab..d279911 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h @@ -47,39 +47,44 @@ #define LP_MAX_FUNC_ARGS 32 enum lp_func_attr { LP_FUNC_ATTR_ALWAYSINLINE = (1 << 0), LP_FUNC_ATTR_BYVAL = (1 << 1), LP_FUNC_ATTR_INREG = (1 << 2), LP_FUNC_ATTR_NOALIAS = (1 << 3), LP_FUNC_ATTR_NOUNWIND = (1 << 4), LP_FUNC_ATTR_READNONE = (1 << 5), LP_FUNC_ATTR_READONLY = (1 << 6), + + /* Legacy intrinsic that needs attributes on function declarations + * and they must match the internal LLVM definition exactly, otherwise + * intrinsic selection fails. + */ + LP_FUNC_ATTR_LEGACY = (1u << 31), }; void lp_format_intrinsic(char *name, size_t size, const char *name_root, LLVMTypeRef type); LLVMValueRef lp_declare_intrinsic(LLVMModuleRef module, const char *name, LLVMTypeRef ret_type, LLVMTypeRef *arg_types, unsigned num_args); void -lp_add_function_attr(LLVMValueRef function, - int attr_idx, - enum lp_func_attr attr); +lp_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, + int attr_idx, enum lp_func_attr attr); LLVMValueRef lp_build_intrinsic(LLVMBuilderRef builder, const char *name, LLVMTypeRef ret_type, LLVMValueRef *args, unsigned num_args, unsigned attr_mask); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 161a03f..a1e2601 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -3311,21 +3311,22 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm, val_type[0] = val_type[1] = val_type[2] = val_type[3] = lp_build_vec_type(gallivm, params->type); ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0); function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0); function = LLVMAddFunction(module, func_name, function_type); for (i = 0; i < num_param; ++i) { if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) { - lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS); + lp_add_function_attr(gallivm->context, function, i + 1, + LP_FUNC_ATTR_NOALIAS); } } LLVMSetFunctionCallConv(function, LLVMFastCallConv); LLVMSetLinkage(function, LLVMInternalLinkage); lp_build_sample_gen_func(gallivm, static_texture_state, static_sampler_state, dynamic_state, diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index af47b52..70b0a67 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -2488,21 +2488,22 @@ generate_fragment(struct llvmpipe_context *lp, function = LLVMAddFunction(gallivm->module, func_name, func_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); variant->function[partial_mask] = function; /* XXX: need to propagate noalias down into color param now we are * passing a pointer-to-pointer? */ for(i = 0; i < ARRAY_SIZE(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS); + lp_add_function_attr(gallivm->context, function, i + 1, + LP_FUNC_ATTR_NOALIAS); context_ptr = LLVMGetParam(function, 0); x = LLVMGetParam(function, 1); y = LLVMGetParam(function, 2); facing = LLVMGetParam(function, 3); a0_ptr = LLVMGetParam(function, 4); dadx_ptr = LLVMGetParam(function, 5); dady_ptr = LLVMGetParam(function, 6); color_ptr_ptr = LLVMGetParam(function, 7); depth_ptr = LLVMGetParam(function, 8); diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c index 6b0df21..66bc42c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -609,29 +609,29 @@ emit_tri_coef( struct gallivm_state *gallivm, default: assert(0); } } } /* XXX: generic code: */ static void -set_noalias(LLVMBuilderRef builder, +set_noalias(LLVMContextRef ctx, LLVMValueRef function, const LLVMTypeRef *arg_types, int nr_args) { int i; for(i = 0; i < nr_args; ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS); + lp_add_function_attr(ctx, function, i + 1, LP_FUNC_ATTR_NOALIAS); } static void init_args(struct gallivm_state *gallivm, const struct lp_setup_variant_key *key, struct lp_setup_args *args) { LLVMBuilderRef b = gallivm->builder; LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef onef = lp_build_const_float(gallivm, 1.0); @@ -783,21 +783,22 @@ generate_setup_variant(struct lp_setup_variant_key *key, lp_build_name(args.dadx, "out_dadx"); lp_build_name(args.dady, "out_dady"); /* * Function body */ block = LLVMAppendBasicBlockInContext(gallivm->context, variant->function, "entry"); LLVMPositionBuilderAtEnd(builder, block); - set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types)); + set_noalias(gallivm->context, variant->function, arg_types, + ARRAY_SIZE(arg_types)); init_args(gallivm, &variant->key, &args); emit_tri_coef(gallivm, &variant->key, &args); LLVMBuildRetVoid(builder); gallivm_verify_function(gallivm, variant->function); gallivm_compile_module(gallivm); variant->jit_function = (lp_jit_setup_triangle) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f9eaea2..ea3f4fd 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -394,21 +394,22 @@ static void declare_input_vs( } args[0] = t_list; args[2] = vertex_index; for (unsigned i = 0; i < num_fetches; i++) { args[1] = LLVMConstInt(ctx->i32, fetch_stride * i, 0); input[i] = lp_build_intrinsic(gallivm->builder, "llvm.SI.vs.load.input", ctx->v4f32, args, 3, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | + LP_FUNC_ATTR_LEGACY); } /* Break up the vec4 into individual components */ for (chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); out[chan] = LLVMBuildExtractElement(gallivm->builder, input[0], llvm_chan, ""); } switch (fix_fetch) { @@ -1116,28 +1117,30 @@ static LLVMValueRef fetch_input_gs( args[3] = uint->zero; args[4] = uint->one; /* OFFEN */ args[5] = uint->zero; /* IDXEN */ args[6] = uint->one; /* GLC */ args[7] = uint->zero; /* SLC */ args[8] = uint->zero; /* TFE */ value = lp_build_intrinsic(gallivm->builder, "llvm.SI.buffer.load.dword.i32.i32", ctx->i32, args, 9, - LP_FUNC_ATTR_READONLY); + LP_FUNC_ATTR_READONLY | + LP_FUNC_ATTR_LEGACY); if (tgsi_type_is_64bit(type)) { LLVMValueRef value2; args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256); value2 = lp_build_intrinsic(gallivm->builder, "llvm.SI.buffer.load.dword.i32.i32", ctx->i32, args, 9, - LP_FUNC_ATTR_READONLY); + LP_FUNC_ATTR_READONLY | + LP_FUNC_ATTR_LEGACY); return si_llvm_emit_fetch_64bit(bld_base, type, value, value2); } return LLVMBuildBitCast(gallivm->builder, value, tgsi2llvmtype(bld_base, type), ""); } static int lookup_interp_param_index(unsigned interpolate, unsigned location) { @@ -1361,21 +1364,22 @@ static LLVMValueRef get_sample_id(struct si_shader_context *radeon_bld) * Load a dword from a constant buffer. */ static LLVMValueRef buffer_load_const(struct si_shader_context *ctx, LLVMValueRef resource, LLVMValueRef offset) { LLVMBuilderRef builder = ctx->gallivm.builder; LLVMValueRef args[2] = {resource, offset}; return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, args, 2, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | + LP_FUNC_ATTR_LEGACY); } static LLVMValueRef load_sample_position(struct si_shader_context *radeon_bld, LLVMValueRef sample_id) { struct si_shader_context *ctx = si_shader_context(&radeon_bld->bld_base); struct lp_build_context *uint_bld = &radeon_bld->bld_base.uint_bld; struct gallivm_state *gallivm = &radeon_bld->gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef desc = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); @@ -1815,21 +1819,22 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, for (chan = 0; chan < 2; chan++) { LLVMValueRef pack_args[2] = { values[2 * chan], values[2 * chan + 1] }; LLVMValueRef packed; packed = lp_build_intrinsic(base->gallivm->builder, "llvm.SI.packf16", ctx->i32, pack_args, 2, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | + LP_FUNC_ATTR_LEGACY); args[chan + 5] = LLVMBuildBitCast(base->gallivm->builder, packed, ctx->f32, ""); } break; case V_028714_SPI_SHADER_UNORM16_ABGR: for (chan = 0; chan < 4; chan++) { val[chan] = ac_emit_clamp(&ctx->ac, values[chan]); val[chan] = LLVMBuildFMul(builder, val[chan], @@ -1947,24 +1952,24 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base, lp_build_cmp(&bld_base->base, ctx->shader->key.part.ps.epilog.alpha_func, alpha, alpha_ref); LLVMValueRef arg = lp_build_select(&bld_base->base, alpha_pass, lp_build_const_float(gallivm, 1.0f), lp_build_const_float(gallivm, -1.0f)); lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill", - ctx->voidt, &arg, 1, 0); + ctx->voidt, &arg, 1, LP_FUNC_ATTR_LEGACY); } else { lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kilp", - ctx->voidt, NULL, 0, 0); + ctx->voidt, NULL, 0, LP_FUNC_ATTR_LEGACY); } } static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base, LLVMValueRef alpha, unsigned samplemask_param) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMValueRef coverage; @@ -2295,21 +2300,21 @@ handle_semantic: si_llvm_init_export_args(bld_base, outputs[i].values, target, args); if (target >= V_008DFC_SQ_EXP_POS && target <= (V_008DFC_SQ_EXP_POS + 3)) { memcpy(pos_args[target - V_008DFC_SQ_EXP_POS], args, sizeof(args)); } else { lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", ctx->voidt, - args, 9, 0); + args, 9, LP_FUNC_ATTR_LEGACY); } if (semantic_name == TGSI_SEMANTIC_CLIPDIST) { semantic_name = TGSI_SEMANTIC_GENERIC; goto handle_semantic; } } shader->info.nr_param_exports = param_count; @@ -2381,21 +2386,22 @@ handle_semantic: continue; /* Specify the target we are exporting */ pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++); if (pos_idx == shader->info.nr_pos_exports) /* Specify that this is the last export */ pos_args[i][2] = uint->one; lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - ctx->voidt, pos_args[i], 9, 0); + ctx->voidt, pos_args[i], 9, + LP_FUNC_ATTR_LEGACY); } } /** * Forward all outputs from the vertex shader to the TES. This is only used * for the fixed function TCS. */ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base) { struct si_shader_context *ctx = si_shader_context(bld_base); @@ -2972,42 +2978,42 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, memcpy(exp->args[exp->num++], args, sizeof(args)); } } static void si_emit_ps_exports(struct si_shader_context *ctx, struct si_ps_exports *exp) { for (unsigned i = 0; i < exp->num; i++) lp_build_intrinsic(ctx->gallivm.builder, "llvm.SI.export", ctx->voidt, - exp->args[i], 9, 0); + exp->args[i], 9, LP_FUNC_ATTR_LEGACY); } static void si_export_null(struct lp_build_tgsi_context *bld_base) { struct si_shader_context *ctx = si_shader_context(bld_base); struct lp_build_context *base = &bld_base->base; struct lp_build_context *uint = &bld_base->uint_bld; LLVMValueRef args[9]; args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */ args[1] = uint->one; /* whether the EXEC mask is valid */ args[2] = uint->one; /* DONE bit */ args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL); args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */ args[5] = base->undef; /* R */ args[6] = base->undef; /* G */ args[7] = base->undef; /* B */ args[8] = base->undef; /* A */ lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - ctx->voidt, args, 9, 0); + ctx->voidt, args, 9, LP_FUNC_ATTR_LEGACY); } /** * Return PS outputs in this order: * * v[0:3] = color0.xyzw * v[4:7] = color1.xyzw * ... * vN+0 = Depth * vN+1 = Stencil @@ -4082,21 +4088,21 @@ static void resq_emit( if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { out = LLVMBuildExtractElement(builder, emit_data->args[0], lp_build_const_int32(gallivm, 2), ""); } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { out = get_buffer_size(bld_base, emit_data->args[0]); } else { out = lp_build_intrinsic( builder, "llvm.SI.getresinfo.i32", emit_data->dst_type, emit_data->args, emit_data->arg_count, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY); /* Divide the number of layers by 6 to get the number of cubes. */ if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY) { LLVMValueRef imm2 = lp_build_const_int32(gallivm, 2); LLVMValueRef imm6 = lp_build_const_int32(gallivm, 6); LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, ""); z = LLVMBuildSDiv(builder, z, imm6, ""); out = LLVMBuildInsertElement(builder, out, z, imm2, ""); } @@ -4319,21 +4325,21 @@ static void txq_emit(const struct lp_build_tgsi_action *action, if (target == TGSI_TEXTURE_BUFFER) { /* Just return the buffer size. */ emit_data->output[emit_data->chan] = emit_data->args[0]; return; } emit_data->output[emit_data->chan] = lp_build_intrinsic( base->gallivm->builder, "llvm.SI.getresinfo.i32", emit_data->dst_type, emit_data->args, emit_data->arg_count, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY); /* Divide the number of layers by 6 to get the number of cubes. */ if (target == TGSI_TEXTURE_CUBE_ARRAY || target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 2); LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 6); LLVMValueRef v4 = emit_data->output[emit_data->chan]; LLVMValueRef z = LLVMBuildExtractElement(builder, v4, two, ""); @@ -4728,21 +4734,21 @@ static void si_lower_gather4_integer(struct si_shader_context *ctx, tmp = LLVMBuildBitCast(builder, tmp, ctx->f32, ""); tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], ""); tmp = LLVMBuildBitCast(builder, tmp, ctx->i32, ""); coord = LLVMBuildInsertElement(builder, coord, tmp, index, ""); } emit_data->args[0] = coord; emit_data->output[emit_data->chan] = lp_build_intrinsic(builder, intr_name, emit_data->dst_type, emit_data->args, emit_data->arg_count, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY); } static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); struct lp_build_context *base = &bld_base->base; const struct tgsi_full_instruction *inst = emit_data->inst; unsigned opcode = inst->Instruction.Opcode; @@ -4752,21 +4758,21 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, bool is_shadow = tgsi_is_shadow_target(target); char type[64]; const char *name = "llvm.SI.image.sample"; const char *infix = ""; if (target == TGSI_TEXTURE_BUFFER) { emit_data->output[emit_data->chan] = lp_build_intrinsic( base->gallivm->builder, "llvm.SI.vs.load.input", emit_data->dst_type, emit_data->args, emit_data->arg_count, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY); return; } switch (opcode) { case TGSI_OPCODE_TXF: name = target == TGSI_TEXTURE_2D_MSAA || target == TGSI_TEXTURE_2D_ARRAY_MSAA ? "llvm.SI.image.load" : "llvm.SI.image.load.mip"; is_shadow = false; @@ -4829,21 +4835,21 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, */ si_lower_gather4_integer(ctx, emit_data, intr_name, (int)has_offset + (int)is_shadow); return; } } emit_data->output[emit_data->chan] = lp_build_intrinsic( base->gallivm->builder, intr_name, emit_data->dst_type, emit_data->args, emit_data->arg_count, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY); } static void si_llvm_emit_txqs( const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; @@ -5118,21 +5124,21 @@ static void si_llvm_emit_vertex( lp_build_const_int32(gallivm, shader->selector->gs_max_out_vertices), ""); bool use_kill = !info->writes_memory; if (use_kill) { kill = lp_build_select(&bld_base->base, can_emit, lp_build_const_float(gallivm, 1.0f), lp_build_const_float(gallivm, -1.0f)); lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill", - ctx->voidt, &kill, 1, 0); + ctx->voidt, &kill, 1, LP_FUNC_ATTR_LEGACY); } else { lp_build_if(&if_state, gallivm, can_emit); } offset = 0; for (i = 0; i < info->num_outputs; i++) { LLVMValueRef *out_ptr = ctx->outputs[i]; for (chan = 0; chan < 4; chan++) { if (!(info->output_usagemask[i] & (1 << chan)) || @@ -5238,24 +5244,26 @@ static void si_create_function(struct si_shader_context *ctx, LLVMValueRef P = LLVMGetParam(ctx->main_fn, i); /* The combination of: * - ByVal * - dereferenceable * - invariant.load * allows the optimization passes to move loads and reduces * SGPR spilling significantly. */ if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) { - lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_BYVAL); + lp_add_function_attr(ctx->gallivm.context, ctx->main_fn, + i + 1, LP_FUNC_ATTR_BYVAL); lp_add_attr_dereferenceable(P, UINT64_MAX); } else - lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG); + lp_add_function_attr(ctx->gallivm.context, ctx->main_fn, + i + 1, LP_FUNC_ATTR_INREG); } LLVMAddTargetDependentFunctionAttr(ctx->main_fn, "no-signed-zeros-fp-math", "true"); if (ctx->screen->b.debug_flags & DBG_UNSAFE_MATH) { /* These were copied from some LLVM test. */ LLVMAddTargetDependentFunctionAttr(ctx->main_fn, "less-precise-fpmad", @@ -5740,21 +5748,22 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx, offset = LLVMBuildMul(builder, address[1], LLVMConstInt(ctx->i32, 4, 0), ""); row = buffer_load_const(ctx, desc, offset); row = LLVMBuildBitCast(builder, row, ctx->i32, ""); bit = LLVMBuildLShr(builder, row, address[0], ""); bit = LLVMBuildTrunc(builder, bit, ctx->i1, ""); /* The intrinsic kills the thread if arg < 0. */ bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0), LLVMConstReal(ctx->f32, -1), ""); - lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1, 0); + lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1, + LP_FUNC_ATTR_LEGACY); } void si_shader_binary_read_config(struct radeon_shader_binary *binary, struct si_shader_config *conf, unsigned symbol_offset) { unsigned i; const unsigned char *config = radeon_shader_binary_config_start(binary, symbol_offset); bool really_needs_scratch = false; @@ -6308,23 +6317,24 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, } args[2] = lp_build_const_int32( gallivm, offset * gs_selector->gs_max_out_vertices * 16 * 4); offset++; outputs[i].values[chan] = LLVMBuildBitCast(gallivm->builder, lp_build_intrinsic(gallivm->builder, - "llvm.SI.buffer.load.dword.i32.i32", - ctx.i32, args, 9, - LP_FUNC_ATTR_READONLY), + "llvm.SI.buffer.load.dword.i32.i32", + ctx.i32, args, 9, + LP_FUNC_ATTR_READONLY | + LP_FUNC_ATTR_LEGACY), ctx.f32, ""); } } /* Streamout and exports. */ if (gs_selector->so.num_outputs) { si_llvm_emit_streamout(&ctx, outputs, gsinfo->num_outputs, stream); } @@ -7047,21 +7057,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef out[48]; LLVMTypeRef function_type; unsigned num_params; unsigned num_out; MAYBE_UNUSED unsigned num_out_sgpr; /* used in debug checks */ unsigned num_sgprs, num_vgprs; unsigned last_sgpr_param; unsigned gprs; for (unsigned i = 0; i < num_parts; ++i) { - lp_add_function_attr(parts[i], -1, LP_FUNC_ATTR_ALWAYSINLINE); + lp_add_function_attr(gallivm->context, parts[i], -1, + LP_FUNC_ATTR_ALWAYSINLINE); LLVMSetLinkage(parts[i], LLVMPrivateLinkage); } /* The parameters of the wrapper function correspond to those of the * first part in terms of SGPRs and VGPRs, but we use the types of the * main part to get the right types. This is relevant for the * dereferenceable attribute on descriptor table pointers. */ num_sgprs = 0; num_vgprs = 0; @@ -7164,21 +7175,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx, param_size = llvm_get_type_size(param_type) / 4; is_sgpr = ac_is_sgpr_param(param); if (is_sgpr) { #if HAVE_LLVM < 0x0400 LLVMRemoveAttribute(param, LLVMByValAttribute); #else unsigned kind_id = LLVMGetEnumAttributeKindForName("byval", 5); LLVMRemoveEnumAttributeAtIndex(parts[part], param_idx + 1, kind_id); #endif - lp_add_function_attr(parts[part], param_idx + 1, LP_FUNC_ATTR_INREG); + lp_add_function_attr(gallivm->context, parts[part], + param_idx + 1, LP_FUNC_ATTR_INREG); } assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr : num_out)); assert(is_sgpr || out_idx >= num_out_sgpr); if (param_size == 1) arg = out[out_idx]; else arg = lp_build_gather_values(gallivm, &out[out_idx], param_size); @@ -7679,21 +7691,21 @@ static void si_build_vs_epilog_function(struct si_shader_context *ctx, key->vs_epilog.prim_id_param_offset); args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */ args[5] = LLVMGetParam(ctx->main_fn, VS_EPILOG_PRIMID_LOC); /* X */ args[6] = base->undef; /* Y */ args[7] = base->undef; /* Z */ args[8] = base->undef; /* W */ lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", LLVMVoidTypeInContext(base->gallivm->context), - args, 9, 0); + args, 9, LP_FUNC_ATTR_LEGACY); } LLVMBuildRetVoid(gallivm->builder); } /** * Create & compile a vertex shader epilog. This a helper used by VS and TES. */ static bool si_get_vs_epilog(struct si_screen *sscreen, LLVMTargetMachineRef tm, diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c index 10268e9..ee59fed 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c @@ -51,27 +51,23 @@ static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base, emit_data->arg_count = 1; emit_data->args[0] = LLVMBuildSelect(builder, conds[0], lp_build_const_float(gallivm, -1.0f), bld_base->base.zero, ""); } static void kil_emit(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { - unsigned i; - for (i = 0; i < emit_data->arg_count; i++) { - emit_data->output[i] = lp_build_intrinsic_unary( - bld_base->base.gallivm->builder, - action->intr_name, - emit_data->dst_type, emit_data->args[i]); - } + lp_build_intrinsic(bld_base->base.gallivm->builder, + action->intr_name, emit_data->dst_type, + &emit_data->args[0], 1, LP_FUNC_ATTR_LEGACY); } static void emit_icmp(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { unsigned pred; LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMContextRef context = bld_base->base.gallivm->context; @@ -500,21 +496,23 @@ static void emit_bfe(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef bfe_sm5; LLVMValueRef cond; bfe_sm5 = lp_build_intrinsic(builder, action->intr_name, emit_data->dst_type, emit_data->args, - emit_data->arg_count, LP_FUNC_ATTR_READNONE); + emit_data->arg_count, + LP_FUNC_ATTR_READNONE | + LP_FUNC_ATTR_LEGACY); /* Correct for GLSL semantics. */ cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2], lp_build_const_int32(gallivm, 32), ""); emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, ""); } /* this is ffs in C */ static void emit_lsb(const struct lp_build_tgsi_action *action, -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev