On 23/02/17 00:01, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

They can vary at call sites if the intrinsic is NOT a legacy SI intrinsic.
We need this to force readnone or inaccessiblememonly on some amdgcn
intrinsics.

This is only used with LLVM 4.0 and later. Intrinsics only used with
LLVM <= 3.9 don't need the LEGACY flag.

gallivm and ac code is in the same patch, because splitting would be
more complicated with all the LEGACY uses all over the place.
---
 src/amd/common/ac_llvm_build.c                    | 23 ++++----
 src/amd/common/ac_llvm_util.c                     | 31 +++++++----
 src/amd/common/ac_llvm_util.h                     | 17 +++---
 src/amd/common/ac_nir_to_llvm.c                   | 63 ++++++++++++++--------
 src/gallium/auxiliary/draw/draw_llvm.c            |  6 ++-
 src/gallium/auxiliary/gallivm/lp_bld_intr.c       | 51 ++++++++++++------
 src/gallium/auxiliary/gallivm/lp_bld_intr.h       | 11 ++--
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |  3 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c        |  3 +-
 src/gallium/drivers/llvmpipe/lp_state_setup.c     |  7 +--
 src/gallium/drivers/radeonsi/si_shader.c          | 64 ++++++++++++++---------
 src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 14 +++--
 12 files changed, 184 insertions(+), 109 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 2f25b14..5c8b7f7 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -75,47 +75,50 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, 
LLVMContextRef context)
                                                        "amdgpu.uniform", 14);

        ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
 }

 LLVMValueRef
 ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
                       LLVMTypeRef return_type, LLVMValueRef *params,
                       unsigned param_count, unsigned attrib_mask)
 {
-       LLVMValueRef function;
+       LLVMValueRef function, call;
+       bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
+                                 !(attrib_mask & AC_FUNC_ATTR_LEGACY);

        function = LLVMGetNamedFunction(ctx->module, name);
        if (!function) {
                LLVMTypeRef param_types[32], function_type;
                unsigned i;

                assert(param_count <= 32);

                for (i = 0; i < param_count; ++i) {
                        assert(params[i]);
                        param_types[i] = LLVMTypeOf(params[i]);
                }
                function_type =
                    LLVMFunctionType(return_type, param_types, param_count, 0);
                function = LLVMAddFunction(ctx->module, name, function_type);

                LLVMSetFunctionCallConv(function, LLVMCCallConv);
                LLVMSetLinkage(function, LLVMExternalLinkage);

-               attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
-               while (attrib_mask) {
-                       enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
-                       ac_add_function_attr(function, -1, attr);
-               }
+               if (!set_callsite_attrs)
+                       ac_add_func_attributes(ctx->context, function, 
attrib_mask);
        }
-       return LLVMBuildCall(ctx->builder, function, params, param_count, "");
+
+       call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
+       if (set_callsite_attrs)
+               ac_add_func_attributes(ctx->context, call, attrib_mask);
+       return call;
 }

 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
                                LLVMValueRef *values,
                                unsigned value_count,
                                unsigned value_stride,
                                bool load)
 {
        LLVMBuilderRef builder = ctx->builder;
@@ -524,21 +527,22 @@ ac_build_tbuffer_store(struct ac_llvm_context *ctx,
        /* The instruction offset field has 12 bits */
        assert(offen || inst_offset < (1 << 12));

        /* The intrinsic is overloaded, we need to add a type suffix for 
overloading to work. */
        unsigned func = CLAMP(num_channels, 1, 3) - 1;
        const char *types[] = {"i32", "v2i32", "v4i32"};
        char name[256];
        snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);

        ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
-                              args, ARRAY_SIZE(args), 0);
+                              args, ARRAY_SIZE(args),
+                              AC_FUNC_ATTR_LEGACY);
 }

 void
 ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
                              LLVMValueRef rsrc,
                              LLVMValueRef vdata,
                              unsigned num_channels,
                              LLVMValueRef vaddr,
                              LLVMValueRef soffset,
                              unsigned inst_offset)
@@ -836,12 +840,13 @@ LLVMValueRef ac_emit_clamp(struct ac_llvm_context *ctx, 
LLVMValueRef value)

        const char *intr = HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." :
                                                 "llvm.AMDIL.clamp.";
        LLVMValueRef args[3] = {
                value,
                LLVMConstReal(ctx->f32, 0),
                LLVMConstReal(ctx->f32, 1),
        };

        return ac_emit_llvm_intrinsic(ctx, intr, ctx->f32, args, 3,
-                                     AC_FUNC_ATTR_READNONE);
+                                     AC_FUNC_ATTR_READNONE |
+                                     AC_FUNC_ATTR_LEGACY);
 }
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index be127c5..fb525dd 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -17,21 +17,21 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
  *
  */
 /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
 #include "ac_llvm_util.h"
-
+#include "util/bitscan.h"
 #include <llvm-c/Core.h>

 #include "c11/threads.h"

 #include <assert.h>
 #include <stdio.h>
 #include <string.h>

 static void ac_init_llvm_target()
 {
@@ -173,40 +173,53 @@ static const char *attr_to_str(enum ac_func_attr attr)
    case AC_FUNC_ATTR_READNONE: return "readnone";
    case AC_FUNC_ATTR_READONLY: return "readonly";
    default:
           fprintf(stderr, "Unhandled function attribute: %x\n", attr);
           return 0;
    }
 }

 #endif

-void
-ac_add_function_attr(LLVMValueRef function,
-                     int attr_idx,
-                     enum ac_func_attr attr)
+static void
+ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
+                     int attr_idx, enum ac_func_attr attr)
 {
-
 #if HAVE_LLVM < 0x0400
    LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
    if (attr_idx == -1) {
       LLVMAddFunctionAttr(function, llvm_attr);
    } else {
       LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
    }
 #else
-   LLVMContextRef context = 
LLVMGetModuleContext(LLVMGetGlobalParent(function));
    const char *attr_name = attr_to_str(attr);
    unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
                                                       strlen(attr_name));
-   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
-   LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
+
+   if (LLVMIsAFunction(function))
+      LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+   else
+      LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
 #endif
 }

+void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
+                           unsigned attrib_mask)
+{
+       attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
+       attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
+
+       while (attrib_mask) {
+               enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
+               ac_add_function_attr(ctx, function, -1, attr);
+       }
+}
+
 void
 ac_dump_module(LLVMModuleRef module)
 {
        char *str = LLVMPrintModuleToString(module);
        fprintf(stderr, "%s", str);
        LLVMDisposeMessage(str);
 }
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 93d3d27..4fe4ab4 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -34,28 +34,29 @@ extern "C" {
 #endif

 enum ac_func_attr {
        AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
        AC_FUNC_ATTR_BYVAL        = (1 << 1),
        AC_FUNC_ATTR_INREG        = (1 << 2),
        AC_FUNC_ATTR_NOALIAS      = (1 << 3),
        AC_FUNC_ATTR_NOUNWIND     = (1 << 4),
        AC_FUNC_ATTR_READNONE     = (1 << 5),
        AC_FUNC_ATTR_READONLY     = (1 << 6),
+
+       /* Legacy intrinsic that needs attributes on function declarations
+        * and they must match the internal LLVM definition exactly, otherwise
+        * intrinsic selection fails.
+        */
+       AC_FUNC_ATTR_LEGACY       = (1u << 31),
 };

 LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool 
supports_spill);

 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
 bool ac_is_sgpr_param(LLVMValueRef param);
-
-void
-ac_add_function_attr(LLVMValueRef function,
-                     int attr_idx,
-                     enum ac_func_attr attr);
-
-void
-ac_dump_module(LLVMModuleRef module);
+void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
+                           unsigned attrib_mask);
+void ac_dump_module(LLVMModuleRef module);

 #ifdef __cplusplus
 }
 #endif
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index b3dc63c..4ec19d5 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1039,26 +1039,27 @@ static LLVMValueRef emit_imul_high(struct 
nir_to_llvm_context *ctx,
        src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
        src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");

        dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
        dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), 
"");
        result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
        return result;
 }

 static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx,
-                                         const char *intrin,
+                                         const char *intrin, unsigned 
attr_mask,
                                          LLVMValueRef srcs[3])
 {
        LLVMValueRef result;
        LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], 
LLVMConstInt(ctx->i32, 32, false), "");
-       result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, 
AC_FUNC_ATTR_READNONE);
+       result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3,
+                                       AC_FUNC_ATTR_READNONE | attr_mask);

        result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
        return result;
 }

 static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx,
                                         LLVMValueRef src0, LLVMValueRef src1,
                                         LLVMValueRef src2, LLVMValueRef src3)
 {
        LLVMValueRef bfi_args[3], result;
@@ -1418,24 +1419,26 @@ static void visit_alu(struct nir_to_llvm_context *ctx, 
nir_alu_instr *instr)
                break;
        case nir_op_fmin:
                result = emit_intrin_2f_param(ctx, "llvm.minnum",
                                              to_float_type(ctx, def_type), 
src[0], src[1]);
                break;
        case nir_op_ffma:
                result = emit_intrin_3f_param(ctx, "llvm.fma",
                                              to_float_type(ctx, def_type), 
src[0], src[1], src[2]);
                break;
        case nir_op_ibitfield_extract:
-               result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src);
+               result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32",
+                                              AC_FUNC_ATTR_LEGACY, src);
                break;
        case nir_op_ubitfield_extract:
-               result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src);
+               result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32",
+                                              AC_FUNC_ATTR_LEGACY, src);
                break;
        case nir_op_bitfield_insert:
                result = emit_bitfield_insert(ctx, src[0], src[1], src[2], 
src[3]);
                break;
        case nir_op_bitfield_reverse:
                result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.bitreverse.i32", 
ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
                break;
        case nir_op_bit_count:
                result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctpop.i32", 
ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
                break;
@@ -1635,22 +1638,23 @@ static LLVMValueRef radv_lower_gather4_integer(struct 
nir_to_llvm_context *ctx,
                txq_args[txq_arg_count++] = tinfo->args[1];
                txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* 
dmask */
                txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* 
unorm */
                txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* 
r128 */
                txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 
0);
                txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* 
glc */
                txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* 
slc */
                txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* 
tfe */
                txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* 
lwe */
                size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", 
ctx->v4i32,
-                                          txq_args, txq_arg_count,
-                                          AC_FUNC_ATTR_READNONE);
+                                             txq_args, txq_arg_count,
+                                             AC_FUNC_ATTR_READNONE |
+                                             AC_FUNC_ATTR_LEGACY);

                for (c = 0; c < 2; c++) {
                        half_texel[c] = LLVMBuildExtractElement(ctx->builder, 
size,
                                                                LLVMConstInt(ctx->i32, c, 
false), "");
                        half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], 
ctx->f32, "");
                        half_texel[c] = ac_emit_fdiv(&ctx->ac, ctx->f32one, 
half_texel[c]);
                        half_texel[c] = LLVMBuildFMul(ctx->builder, 
half_texel[c],
                                                      LLVMConstReal(ctx->f32, -0.5), 
"");
                }
        }
@@ -1660,21 +1664,22 @@ static LLVMValueRef radv_lower_gather4_integer(struct 
nir_to_llvm_context *ctx,
                LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + 
c, 0);
                tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
                tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
                tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
                tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
                coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, 
"");
        }

        tinfo->args[0] = coord;
        return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, 
tinfo->args, tinfo->arg_count,
-                                  AC_FUNC_ATTR_READNONE | 
AC_FUNC_ATTR_NOUNWIND);
+                                     AC_FUNC_ATTR_READNONE | 
AC_FUNC_ATTR_NOUNWIND |
+                                     AC_FUNC_ATTR_LEGACY);

 }

 static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
                                        nir_tex_instr *instr,
                                        struct ac_tex_info *tinfo)
 {
        const char *name = "llvm.SI.image.sample";
        const char *infix = "";
        char intr_name[127];
@@ -1728,21 +1733,22 @@ static LLVMValueRef build_tex_intrinsic(struct 
nir_to_llvm_context *ctx,
                has_offset ? ".o" : "", type);

        if (instr->op == nir_texop_tg4) {
                enum glsl_base_type stype = 
glsl_get_sampler_result_type(instr->texture->var->type);
                if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
                        return radv_lower_gather4_integer(ctx, tinfo, instr, 
intr_name,
                                                          (int)has_offset + 
(int)is_shadow);
                }
        }
        return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, 
tinfo->args, tinfo->arg_count,
-                                  AC_FUNC_ATTR_READNONE | 
AC_FUNC_ATTR_NOUNWIND);
+                                     AC_FUNC_ATTR_READNONE | 
AC_FUNC_ATTR_NOUNWIND |
+                                     AC_FUNC_ATTR_LEGACY);

 }

 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context 
*ctx,
                                                 nir_intrinsic_instr *instr)
 {
        LLVMValueRef index = get_src(ctx, instr->src[0]);
        unsigned desc_set = nir_intrinsic_desc_set(instr);
        unsigned binding = nir_intrinsic_binding(instr);
        LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
@@ -2006,21 +2012,23 @@ static LLVMValueRef visit_load_ubo_buffer(struct 
nir_to_llvm_context *ctx,
        if (instr->dest.ssa.bit_size == 64)
                num_components *= 2;

        for (unsigned i = 0; i < num_components; ++i) {
                LLVMValueRef params[] = {
                        rsrc,
                        LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * 
i, 0),
                                     offset, "")
                };
                results[i] = ac_emit_llvm_intrinsic(&ctx->ac, 
"llvm.SI.load.const", ctx->f32,
-                                                params, 2, 
AC_FUNC_ATTR_READNONE);
+                                                   params, 2,
+                                                   AC_FUNC_ATTR_READNONE |
+                                                   AC_FUNC_ATTR_LEGACY);
        }


        ret = ac_build_gather_values(&ctx->ac, results, instr->num_components);
        return LLVMBuildBitCast(ctx->builder, ret,
                                get_def_type(ctx, &instr->dest.ssa), "");
 }

 static void
 radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail,
@@ -2103,21 +2111,23 @@ load_gs_input(struct nir_to_llvm_context *ctx,
                args[1] = vtx_offset;
                args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) 
* 256, false);
                args[3] = ctx->i32zero;
                args[4] = ctx->i32one; /* OFFEN */
                args[5] = ctx->i32zero; /* IDXEN */
                args[6] = ctx->i32one; /* GLC */
                args[7] = ctx->i32zero; /* SLC */
                args[8] = ctx->i32zero; /* TFE */

                value[i] = ac_emit_llvm_intrinsic(&ctx->ac, 
"llvm.SI.buffer.load.dword.i32.i32",
-                                           ctx->i32, args, 9, 
AC_FUNC_ATTR_READONLY);
+                                                 ctx->i32, args, 9,
+                                                 AC_FUNC_ATTR_READONLY |
+                                                 AC_FUNC_ATTR_LEGACY);
        }
        result = ac_build_gather_values(&ctx->ac, value, instr->num_components);

        return result;
 }

 static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
                                   nir_intrinsic_instr *instr)
 {
        LLVMValueRef values[8];
@@ -2685,21 +2695,23 @@ static LLVMValueRef visit_image_size(struct 
nir_to_llvm_context *ctx,
        params[2] = LLVMConstInt(ctx->i32, 15, false);
        params[3] = ctx->i32zero;
        params[4] = ctx->i32zero;
        params[5] = da ? ctx->i32one : ctx->i32zero;
        params[6] = ctx->i32zero;
        params[7] = ctx->i32zero;
        params[8] = ctx->i32zero;
        params[9] = ctx->i32zero;

        res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", 
ctx->v4i32,
-                                 params, 10, AC_FUNC_ATTR_READNONE);
+                                    params, 10,
+                                    AC_FUNC_ATTR_READNONE |
+                                    AC_FUNC_ATTR_LEGACY);

        if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
            glsl_sampler_type_is_array(type)) {
                LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
                LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
                LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, 
"");
                z = LLVMBuildSDiv(ctx->builder, z, six, "");
                res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
        }
        return res;
@@ -2729,21 +2741,21 @@ static void emit_discard_if(struct nir_to_llvm_context 
*ctx,

        cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
                             get_src(ctx, instr->src[0]),
                             ctx->i32zero, "");

        cond = LLVMBuildSelect(ctx->builder, cond,
                               LLVMConstReal(ctx->f32, -1.0f),
                               ctx->f32zero, "");
        ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
                               ctx->voidt,
-                              &cond, 1, 0);
+                              &cond, 1, AC_FUNC_ATTR_LEGACY);
 }

 static LLVMValueRef
 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
 {
        LLVMValueRef result;
        LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
        result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
                              LLVMConstInt(ctx->i32, 0xfc0, false), "");

@@ -2984,21 +2996,21 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
         * have any effect, and GS threads have no externally observable
         * effects other than emitting vertices.
         */
        can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
                                 LLVMConstInt(ctx->i32, ctx->gs_max_out_vertices, false), 
"");

        kill = LLVMBuildSelect(ctx->builder, can_emit,
                               LLVMConstReal(ctx->f32, 1.0f),
                               LLVMConstReal(ctx->f32, -1.0f), "");
        ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
-                           ctx->voidt, &kill, 1, 0);
+                           ctx->voidt, &kill, 1, AC_FUNC_ATTR_LEGACY);

        /* loop num outputs */
        idx = 0;
        for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
                LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
                if (!(ctx->output_mask & (1ull << i)))
                        continue;

                for (unsigned j = 0; j < 4; j++) {
                        LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
@@ -3144,21 +3156,21 @@ static void visit_intrinsic(struct nir_to_llvm_context 
*ctx,
        case nir_intrinsic_image_atomic_comp_swap:
                result = visit_image_atomic(ctx, instr);
                break;
        case nir_intrinsic_image_size:
                result = visit_image_size(ctx, instr);
                break;
        case nir_intrinsic_discard:
                ctx->shader_info->fs.can_discard = true;
                ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
                                       ctx->voidt,
-                                      NULL, 0, 0);
+                                      NULL, 0, AC_FUNC_ATTR_LEGACY);
                break;
        case nir_intrinsic_discard_if:
                emit_discard_if(ctx, instr);
                break;
        case nir_intrinsic_memory_barrier:
                emit_waitcnt(ctx);
                break;
        case nir_intrinsic_barrier:
                emit_barrier(ctx);
                break;
@@ -3924,21 +3936,22 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx,

        for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
                t_offset = LLVMConstInt(ctx->i32, index + i, false);

                t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, 
t_offset);
                args[0] = t_list;
                args[1] = LLVMConstInt(ctx->i32, 0, false);
                args[2] = buffer_index;
                input = ac_emit_llvm_intrinsic(&ctx->ac,
                        "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
-                       AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
+                       AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
+                       AC_FUNC_ATTR_LEGACY);

                for (unsigned chan = 0; chan < 4; chan++) {
                        LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, 
false);
                        ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
                                to_integer(ctx, 
LLVMBuildExtractElement(ctx->builder,
                                                        input, llvm_chan, ""));
                }
        }
 }

@@ -4257,22 +4270,23 @@ si_llvm_init_export_args(struct nir_to_llvm_context 
*ctx,
                        args[4] = ctx->i32one;

                        for (unsigned chan = 0; chan < 2; chan++) {
                                LLVMValueRef pack_args[2] = {
                                        values[2 * chan],
                                        values[2 * chan + 1]
                                };
                                LLVMValueRef packed;

                                packed = ac_emit_llvm_intrinsic(&ctx->ac, 
"llvm.SI.packf16",
-                                                            ctx->i32, 
pack_args, 2,
-                                                            
AC_FUNC_ATTR_READNONE);
+                                                               ctx->i32, 
pack_args, 2,
+                                                               
AC_FUNC_ATTR_READNONE |
+                                                               
AC_FUNC_ATTR_LEGACY);
                                args[chan + 5] = packed;
                        }
                        break;

                case V_028714_SPI_SHADER_UNORM16_ABGR:
                        for (unsigned chan = 0; chan < 4; chan++) {
                                val[chan] = emit_float_saturate(ctx, 
values[chan], 0, 1);
                                val[chan] = LLVMBuildFMul(ctx->builder, 
val[chan],
                                                        LLVMConstReal(ctx->f32, 65535), 
"");
                                val[chan] = LLVMBuildFAdd(ctx->builder, 
val[chan],
@@ -4443,21 +4457,22 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
                si_llvm_init_export_args(ctx, values, target, args);

                if (target >= V_008DFC_SQ_EXP_POS &&
                    target <= (V_008DFC_SQ_EXP_POS + 3)) {
                        memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
                               args, sizeof(args));
                } else {
                        ac_emit_llvm_intrinsic(&ctx->ac,
                                               "llvm.SI.export",
                                               ctx->voidt,
-                                              args, 9, 0);
+                                              args, 9,
+                                              AC_FUNC_ATTR_LEGACY);
                }
        }

        /* We need to add the position output manually if it's missing. */
        if (!pos_args[0][0]) {
                pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false);
                pos_args[0][1] = ctx->i32zero; /* EXEC mask */
                pos_args[0][2] = ctx->i32zero; /* last export? */
                pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, 
false);
                pos_args[0][4] = ctx->i32zero; /* COMPR flag */
@@ -4498,21 +4513,22 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
                if (!pos_args[i][0])
                        continue;

                /* Specify the target we are exporting */
                pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + 
pos_idx++, false);
                if (pos_idx == num_pos_exports)
                        pos_args[i][2] = ctx->i32one;
                ac_emit_llvm_intrinsic(&ctx->ac,
                                       "llvm.SI.export",
                                       ctx->voidt,
-                                      pos_args[i], 9, 0);
+                                      pos_args[i], 9,
+                                      AC_FUNC_ATTR_LEGACY);
        }

        ctx->shader_info->vs.pos_exports = num_pos_exports;
        ctx->shader_info->vs.param_exports = param_count;
 }

 static void
 handle_es_outputs_post(struct nir_to_llvm_context *ctx)
 {
        int j;
@@ -4554,21 +4570,22 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx,
        si_llvm_init_export_args(ctx, color, param,
                                 args);

        if (is_last) {
                args[1] = ctx->i32one; /* whether the EXEC mask is valid */
                args[2] = ctx->i32one; /* DONE bit */
        } else if (args[0] == ctx->i32zero)
                return; /* unnecessary NULL export */

        ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
-                           ctx->voidt, args, 9, 0);
+                              ctx->voidt, args, 9,
+                              AC_FUNC_ATTR_LEGACY);
 }

 static void
 si_export_mrt_z(struct nir_to_llvm_context *ctx,
                LLVMValueRef depth, LLVMValueRef stencil,
                LLVMValueRef samplemask)
 {
        LLVMValueRef args[9];
        unsigned mask = 0;
        args[1] = ctx->i32one; /* whether the EXEC mask is valid */
@@ -4598,21 +4615,22 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
        }

        /* SI (except OLAND) has a bug that it only looks
         * at the X writemask component. */
        if (ctx->options->chip_class == SI &&
            ctx->options->family != CHIP_OLAND)
                mask |= 0x01;

        args[0] = LLVMConstInt(ctx->i32, mask, false);
        ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
-                           ctx->voidt, args, 9, 0);
+                              ctx->voidt, args, 9,
+                              AC_FUNC_ATTR_LEGACY);
 }

 static void
 handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
 {
        unsigned index = 0;
        LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;

        for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
                LLVMValueRef values[4];
@@ -5021,21 +5039,22 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)

                for (unsigned j = 0; j < 4; j++) {
                        LLVMValueRef value;
                        args[2] = LLVMConstInt(ctx->i32,
                                               (idx * 4 + j) *
                                               ctx->gs_max_out_vertices * 16 * 
4, false);

                        value = ac_emit_llvm_intrinsic(&ctx->ac,
                                                       
"llvm.SI.buffer.load.dword.i32.i32",
                                                       ctx->i32, args, 9,
-                                                      AC_FUNC_ATTR_READONLY);
+                                                      AC_FUNC_ATTR_READONLY |
+                                                      AC_FUNC_ATTR_LEGACY);

                        LLVMBuildStore(ctx->builder,
                                       to_float(ctx, value), 
ctx->outputs[radeon_llvm_reg_index_soa(i, j)]);
                }
                idx++;
        }
        handle_vs_outputs_post(ctx);
 }

 void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 8952dc8..586a9be 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1588,21 +1588,22 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant)

    func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
                                 arg_types, num_arg_types, 0);

    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
    variant->function = variant_func;

    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
    for (i = 0; i < num_arg_types; ++i)
       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
-         lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
+         lp_add_function_attr(context, variant_func, i + 1,
+                              LP_FUNC_ATTR_NOALIAS);

    context_ptr               = LLVMGetParam(variant_func, 0);
    io_ptr                    = LLVMGetParam(variant_func, 1);
    vbuffers_ptr              = LLVMGetParam(variant_func, 2);
    count                     = LLVMGetParam(variant_func, 3);
    /*
     * XXX: the maxelt part is unused. Not really useful, since we cannot
     * get index buffer overflows due to vsplit (which provides its own
     * elts buffer, with a different size than what's passed in here).
     */
@@ -2262,21 +2263,22 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 
0);

    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);

    variant->function = variant_func;

    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);

    for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
-         lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
+         lp_add_function_attr(context, variant_func, i + 1,
+                              LP_FUNC_ATTR_NOALIAS);

    context_ptr               = LLVMGetParam(variant_func, 0);
    input_array               = LLVMGetParam(variant_func, 1);
    io_ptr                    = LLVMGetParam(variant_func, 2);
    num_prims                 = LLVMGetParam(variant_func, 3);
    system_values.instance_id = LLVMGetParam(variant_func, 4);
    prim_id_ptr               = LLVMGetParam(variant_func, 5);
    system_values.invocation_id = LLVMGetParam(variant_func, 6);

    lp_build_name(context_ptr, "context");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c 
b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
index 049671a..1b50e68 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
@@ -152,83 +152,100 @@ static const char *attr_to_str(enum lp_func_attr attr)
    case LP_FUNC_ATTR_READONLY: return "readonly";
    default:
       _debug_printf("Unhandled function attribute: %x\n", attr);
       return 0;
    }
 }

 #endif

 void
-lp_add_function_attr(LLVMValueRef function,
-                     int attr_idx,
-                     enum lp_func_attr attr)
+lp_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,

If function is not always a function, then it's better the parameter is renamed to functionOrCall or something like that.

+                     int attr_idx, enum lp_func_attr attr)
 {

 #if HAVE_LLVM < 0x0400
    LLVMAttribute llvm_attr = lp_attr_to_llvm_attr(attr);
    if (attr_idx == -1) {
       LLVMAddFunctionAttr(function, llvm_attr);
    } else {
       LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
    }
 #else
-   LLVMContextRef context = 
LLVMGetModuleContext(LLVMGetGlobalParent(function));

Even when LLVMIsAFunction(function) is false, we could still get the LLVMContextRef:

   LLVMModuleRef module;
   if (LLVMIsAFunction(functionOrCall)) {
      module = LLVMGetGlobalParent(functionOrCall);
   } else {
      LLVMBasicBlockRef bb = LLVMValueAsBasicBlock(functionOrCall);
      LLVMValueRef function = LLVMGetBasicBlockParent(bb)
      module = LLVMGetGlobalParent(function);

   }
   LLVMContextRef context = LLVMGetModuleContext(module);

This would enable to keep lp_add_function_attr prototype alone.

Otherwise looks good to me.

Reviewed-by: Jose Fonseca <jfons...@vmware.com>

    const char *attr_name = attr_to_str(attr);
    unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
                                                       strlen(attr_name));
-   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
-   LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
+
+   if (LLVMIsAFunction(function))
+      LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+   else
+      LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
 #endif
 }

+static void
+lp_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
+                       unsigned attrib_mask)
+{
+   /* NoUnwind indicates that the intrinsic never raises a C++ exception.
+    * Set it for all intrinsics.
+    */
+   attrib_mask |= LP_FUNC_ATTR_NOUNWIND;
+   attrib_mask &= ~LP_FUNC_ATTR_LEGACY;
+
+   while (attrib_mask) {
+      enum lp_func_attr attr = 1u << u_bit_scan(&attrib_mask);
+      lp_add_function_attr(ctx, function, -1, attr);
+   }
+}
+
 LLVMValueRef
 lp_build_intrinsic(LLVMBuilderRef builder,
                    const char *name,
                    LLVMTypeRef ret_type,
                    LLVMValueRef *args,
                    unsigned num_args,
                    unsigned attr_mask)
 {
    LLVMModuleRef module = 
LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
-   LLVMValueRef function;
+   LLVMContextRef ctx = LLVMGetModuleContext(module);
+   LLVMValueRef function, call;
+   bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
+                             !(attr_mask & LP_FUNC_ATTR_LEGACY);

    function = LLVMGetNamedFunction(module, name);
    if(!function) {
       LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS];
       unsigned i;

       assert(num_args <= LP_MAX_FUNC_ARGS);

       for(i = 0; i < num_args; ++i) {
          assert(args[i]);
          arg_types[i] = LLVMTypeOf(args[i]);
       }

       function = lp_declare_intrinsic(module, name, ret_type, arg_types, 
num_args);

-      /* NoUnwind indicates that the intrinsic never raises a C++ exception.
-       * Set it for all intrinsics.
-       */
-      attr_mask |= LP_FUNC_ATTR_NOUNWIND;
-
-      while (attr_mask) {
-         enum lp_func_attr attr = 1 << u_bit_scan(&attr_mask);
-         lp_add_function_attr(function, -1, attr);
-      }
+      if (!set_callsite_attrs)
+         lp_add_func_attributes(ctx, function, attr_mask);

       if (gallivm_debug & GALLIVM_DEBUG_IR) {
          lp_debug_dump_value(function);
       }
    }

-   return LLVMBuildCall(builder, function, args, num_args, "");
+   call = LLVMBuildCall(builder, function, args, num_args, "");
+   if (set_callsite_attrs)
+      lp_add_func_attributes(ctx, call, attr_mask);
+   return call;
 }


 LLVMValueRef
 lp_build_intrinsic_unary(LLVMBuilderRef builder,
                          const char *name,
                          LLVMTypeRef ret_type,
                          LLVMValueRef a)
 {
    return lp_build_intrinsic(builder, name, ret_type, &a, 1, 0);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h 
b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index 039e9ab..d279911 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -47,39 +47,44 @@
 #define LP_MAX_FUNC_ARGS 32

 enum lp_func_attr {
    LP_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
    LP_FUNC_ATTR_BYVAL        = (1 << 1),
    LP_FUNC_ATTR_INREG        = (1 << 2),
    LP_FUNC_ATTR_NOALIAS      = (1 << 3),
    LP_FUNC_ATTR_NOUNWIND     = (1 << 4),
    LP_FUNC_ATTR_READNONE     = (1 << 5),
    LP_FUNC_ATTR_READONLY     = (1 << 6),
+
+   /* Legacy intrinsic that needs attributes on function declarations
+    * and they must match the internal LLVM definition exactly, otherwise
+    * intrinsic selection fails.
+    */
+   LP_FUNC_ATTR_LEGACY       = (1u << 31),
 };

 void
 lp_format_intrinsic(char *name,
                     size_t size,
                     const char *name_root,
                     LLVMTypeRef type);

 LLVMValueRef
 lp_declare_intrinsic(LLVMModuleRef module,
                      const char *name,
                      LLVMTypeRef ret_type,
                      LLVMTypeRef *arg_types,
                      unsigned num_args);

 void
-lp_add_function_attr(LLVMValueRef function,
-                     int attr_idx,
-                     enum lp_func_attr attr);
+lp_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
+                     int attr_idx, enum lp_func_attr attr);

 LLVMValueRef
 lp_build_intrinsic(LLVMBuilderRef builder,
                    const char *name,
                    LLVMTypeRef ret_type,
                    LLVMValueRef *args,
                    unsigned num_args,
                    unsigned attr_mask);


diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 161a03f..a1e2601 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -3311,21 +3311,22 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,

       val_type[0] = val_type[1] = val_type[2] = val_type[3] =
          lp_build_vec_type(gallivm, params->type);
       ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
       function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
       function = LLVMAddFunction(module, func_name, function_type);

       for (i = 0; i < num_param; ++i) {
          if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {

-            lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
+            lp_add_function_attr(gallivm->context, function, i + 1,
+                                 LP_FUNC_ATTR_NOALIAS);
          }
       }

       LLVMSetFunctionCallConv(function, LLVMFastCallConv);
       LLVMSetLinkage(function, LLVMInternalLinkage);

       lp_build_sample_gen_func(gallivm,
                                static_texture_state,
                                static_sampler_state,
                                dynamic_state,
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c 
b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index af47b52..70b0a67 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -2488,21 +2488,22 @@ generate_fragment(struct llvmpipe_context *lp,
    function = LLVMAddFunction(gallivm->module, func_name, func_type);
    LLVMSetFunctionCallConv(function, LLVMCCallConv);

    variant->function[partial_mask] = function;

    /* XXX: need to propagate noalias down into color param now we are
     * passing a pointer-to-pointer?
     */
    for(i = 0; i < ARRAY_SIZE(arg_types); ++i)
       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
-         lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
+         lp_add_function_attr(gallivm->context, function, i + 1,
+                              LP_FUNC_ATTR_NOALIAS);

    context_ptr  = LLVMGetParam(function, 0);
    x            = LLVMGetParam(function, 1);
    y            = LLVMGetParam(function, 2);
    facing       = LLVMGetParam(function, 3);
    a0_ptr       = LLVMGetParam(function, 4);
    dadx_ptr     = LLVMGetParam(function, 5);
    dady_ptr     = LLVMGetParam(function, 6);
    color_ptr_ptr = LLVMGetParam(function, 7);
    depth_ptr    = LLVMGetParam(function, 8);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c 
b/src/gallium/drivers/llvmpipe/lp_state_setup.c
index 6b0df21..66bc42c 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -609,29 +609,29 @@ emit_tri_coef( struct gallivm_state *gallivm,
       default:
          assert(0);
       }
    }
 }


 /* XXX: generic code:
  */
 static void
-set_noalias(LLVMBuilderRef builder,
+set_noalias(LLVMContextRef ctx,
             LLVMValueRef function,
             const LLVMTypeRef *arg_types,
             int nr_args)
 {
    int i;
    for(i = 0; i < nr_args; ++i)
       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
-         lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
+         lp_add_function_attr(ctx, function, i + 1, LP_FUNC_ATTR_NOALIAS);
 }

 static void
 init_args(struct gallivm_state *gallivm,
           const struct lp_setup_variant_key *key,
           struct lp_setup_args *args)
 {
    LLVMBuilderRef b = gallivm->builder;
    LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
    LLVMValueRef onef = lp_build_const_float(gallivm, 1.0);
@@ -783,21 +783,22 @@ generate_setup_variant(struct lp_setup_variant_key *key,
    lp_build_name(args.dadx, "out_dadx");
    lp_build_name(args.dady, "out_dady");

    /*
     * Function body
     */
    block = LLVMAppendBasicBlockInContext(gallivm->context,
                                          variant->function, "entry");
    LLVMPositionBuilderAtEnd(builder, block);

-   set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types));
+   set_noalias(gallivm->context, variant->function, arg_types,
+               ARRAY_SIZE(arg_types));
    init_args(gallivm, &variant->key, &args);
    emit_tri_coef(gallivm, &variant->key, &args);

    LLVMBuildRetVoid(builder);

    gallivm_verify_function(gallivm, variant->function);

    gallivm_compile_module(gallivm);

    variant->jit_function = (lp_jit_setup_triangle)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index f9eaea2..ea3f4fd 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -394,21 +394,22 @@ static void declare_input_vs(
        }

        args[0] = t_list;
        args[2] = vertex_index;

        for (unsigned i = 0; i < num_fetches; i++) {
                args[1] = LLVMConstInt(ctx->i32, fetch_stride * i, 0);

                input[i] = lp_build_intrinsic(gallivm->builder,
                        "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
-                       LP_FUNC_ATTR_READNONE);
+                       LP_FUNC_ATTR_READNONE |
+                       LP_FUNC_ATTR_LEGACY);
        }

        /* Break up the vec4 into individual components */
        for (chan = 0; chan < 4; chan++) {
                LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
                out[chan] = LLVMBuildExtractElement(gallivm->builder,
                                                    input[0], llvm_chan, "");
        }

        switch (fix_fetch) {
@@ -1116,28 +1117,30 @@ static LLVMValueRef fetch_input_gs(
        args[3] = uint->zero;
        args[4] = uint->one;  /* OFFEN */
        args[5] = uint->zero; /* IDXEN */
        args[6] = uint->one;  /* GLC */
        args[7] = uint->zero; /* SLC */
        args[8] = uint->zero; /* TFE */

        value = lp_build_intrinsic(gallivm->builder,
                                   "llvm.SI.buffer.load.dword.i32.i32",
                                   ctx->i32, args, 9,
-                                  LP_FUNC_ATTR_READONLY);
+                                  LP_FUNC_ATTR_READONLY |
+                                  LP_FUNC_ATTR_LEGACY);
        if (tgsi_type_is_64bit(type)) {
                LLVMValueRef value2;
                args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 
1) * 256);
                value2 = lp_build_intrinsic(gallivm->builder,
                                            "llvm.SI.buffer.load.dword.i32.i32",
                                            ctx->i32, args, 9,
-                                           LP_FUNC_ATTR_READONLY);
+                                           LP_FUNC_ATTR_READONLY |
+                                           LP_FUNC_ATTR_LEGACY);
                return si_llvm_emit_fetch_64bit(bld_base, type,
                                                value, value2);
        }
        return LLVMBuildBitCast(gallivm->builder,
                                value,
                                tgsi2llvmtype(bld_base, type), "");
 }

 static int lookup_interp_param_index(unsigned interpolate, unsigned location)
 {
@@ -1361,21 +1364,22 @@ static LLVMValueRef get_sample_id(struct 
si_shader_context *radeon_bld)
  * Load a dword from a constant buffer.
  */
 static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
                                      LLVMValueRef resource,
                                      LLVMValueRef offset)
 {
        LLVMBuilderRef builder = ctx->gallivm.builder;
        LLVMValueRef args[2] = {resource, offset};

        return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, 
args, 2,
-                              LP_FUNC_ATTR_READNONE);
+                                 LP_FUNC_ATTR_READNONE |
+                                 LP_FUNC_ATTR_LEGACY);
 }

 static LLVMValueRef load_sample_position(struct si_shader_context *radeon_bld, 
LLVMValueRef sample_id)
 {
        struct si_shader_context *ctx =
                si_shader_context(&radeon_bld->bld_base);
        struct lp_build_context *uint_bld = &radeon_bld->bld_base.uint_bld;
        struct gallivm_state *gallivm = &radeon_bld->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        LLVMValueRef desc = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS);
@@ -1815,21 +1819,22 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
                for (chan = 0; chan < 2; chan++) {
                        LLVMValueRef pack_args[2] = {
                                values[2 * chan],
                                values[2 * chan + 1]
                        };
                        LLVMValueRef packed;

                        packed = lp_build_intrinsic(base->gallivm->builder,
                                                    "llvm.SI.packf16",
                                                    ctx->i32, pack_args, 2,
-                                                   LP_FUNC_ATTR_READNONE);
+                                                   LP_FUNC_ATTR_READNONE |
+                                                   LP_FUNC_ATTR_LEGACY);
                        args[chan + 5] =
                                LLVMBuildBitCast(base->gallivm->builder,
                                                 packed, ctx->f32, "");
                }
                break;

        case V_028714_SPI_SHADER_UNORM16_ABGR:
                for (chan = 0; chan < 4; chan++) {
                        val[chan] = ac_emit_clamp(&ctx->ac, values[chan]);
                        val[chan] = LLVMBuildFMul(builder, val[chan],
@@ -1947,24 +1952,24 @@ static void si_alpha_test(struct lp_build_tgsi_context 
*bld_base,
                        lp_build_cmp(&bld_base->base,
                                     ctx->shader->key.part.ps.epilog.alpha_func,
                                     alpha, alpha_ref);
                LLVMValueRef arg =
                        lp_build_select(&bld_base->base,
                                        alpha_pass,
                                        lp_build_const_float(gallivm, 1.0f),
                                        lp_build_const_float(gallivm, -1.0f));

                lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
-                                  ctx->voidt, &arg, 1, 0);
+                                  ctx->voidt, &arg, 1, LP_FUNC_ATTR_LEGACY);
        } else {
                lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kilp",
-                                  ctx->voidt, NULL, 0, 0);
+                                  ctx->voidt, NULL, 0, LP_FUNC_ATTR_LEGACY);
        }
 }

 static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context 
*bld_base,
                                                  LLVMValueRef alpha,
                                                  unsigned samplemask_param)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        LLVMValueRef coverage;
@@ -2295,21 +2300,21 @@ handle_semantic:

                si_llvm_init_export_args(bld_base, outputs[i].values, target, 
args);

                if (target >= V_008DFC_SQ_EXP_POS &&
                    target <= (V_008DFC_SQ_EXP_POS + 3)) {
                        memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
                               args, sizeof(args));
                } else {
                        lp_build_intrinsic(base->gallivm->builder,
                                           "llvm.SI.export", ctx->voidt,
-                                          args, 9, 0);
+                                          args, 9, LP_FUNC_ATTR_LEGACY);
                }

                if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
                        semantic_name = TGSI_SEMANTIC_GENERIC;
                        goto handle_semantic;
                }
        }

        shader->info.nr_param_exports = param_count;

@@ -2381,21 +2386,22 @@ handle_semantic:
                        continue;

                /* Specify the target we are exporting */
                pos_args[i][3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_POS + pos_idx++);

                if (pos_idx == shader->info.nr_pos_exports)
                        /* Specify that this is the last export */
                        pos_args[i][2] = uint->one;

                lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
-                                  ctx->voidt, pos_args[i], 9, 0);
+                                  ctx->voidt, pos_args[i], 9,
+                                  LP_FUNC_ATTR_LEGACY);
        }
 }

 /**
  * Forward all outputs from the vertex shader to the TES. This is only used
  * for the fixed function TCS.
  */
 static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
@@ -2972,42 +2978,42 @@ static void si_export_mrt_color(struct 
lp_build_tgsi_context *bld_base,
                memcpy(exp->args[exp->num++], args, sizeof(args));
        }
 }

 static void si_emit_ps_exports(struct si_shader_context *ctx,
                               struct si_ps_exports *exp)
 {
        for (unsigned i = 0; i < exp->num; i++)
                lp_build_intrinsic(ctx->gallivm.builder,
                                   "llvm.SI.export", ctx->voidt,
-                                  exp->args[i], 9, 0);
+                                  exp->args[i], 9, LP_FUNC_ATTR_LEGACY);
 }

 static void si_export_null(struct lp_build_tgsi_context *bld_base)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct lp_build_context *base = &bld_base->base;
        struct lp_build_context *uint = &bld_base->uint_bld;
        LLVMValueRef args[9];

        args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels 
*/
        args[1] = uint->one; /* whether the EXEC mask is valid */
        args[2] = uint->one; /* DONE bit */
        args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL);
        args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
        args[5] = base->undef; /* R */
        args[6] = base->undef; /* G */
        args[7] = base->undef; /* B */
        args[8] = base->undef; /* A */

        lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
-                          ctx->voidt, args, 9, 0);
+                          ctx->voidt, args, 9, LP_FUNC_ATTR_LEGACY);
 }

 /**
  * Return PS outputs in this order:
  *
  * v[0:3] = color0.xyzw
  * v[4:7] = color1.xyzw
  * ...
  * vN+0 = Depth
  * vN+1 = Stencil
@@ -4082,21 +4088,21 @@ static void resq_emit(

        if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
                out = LLVMBuildExtractElement(builder, emit_data->args[0],
                                              lp_build_const_int32(gallivm, 2), 
"");
        } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
                out = get_buffer_size(bld_base, emit_data->args[0]);
        } else {
                out = lp_build_intrinsic(
                        builder, "llvm.SI.getresinfo.i32", emit_data->dst_type,
                        emit_data->args, emit_data->arg_count,
-                       LP_FUNC_ATTR_READNONE);
+                       LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);

                /* Divide the number of layers by 6 to get the number of cubes. 
*/
                if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
                        LLVMValueRef imm2 = lp_build_const_int32(gallivm, 2);
                        LLVMValueRef imm6 = lp_build_const_int32(gallivm, 6);

                        LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, 
"");
                        z = LLVMBuildSDiv(builder, z, imm6, "");
                        out = LLVMBuildInsertElement(builder, out, z, imm2, "");
                }
@@ -4319,21 +4325,21 @@ static void txq_emit(const struct lp_build_tgsi_action 
*action,

        if (target == TGSI_TEXTURE_BUFFER) {
                /* Just return the buffer size. */
                emit_data->output[emit_data->chan] = emit_data->args[0];
                return;
        }

        emit_data->output[emit_data->chan] = lp_build_intrinsic(
                base->gallivm->builder, "llvm.SI.getresinfo.i32",
                emit_data->dst_type, emit_data->args, emit_data->arg_count,
-               LP_FUNC_ATTR_READNONE);
+               LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);

        /* Divide the number of layers by 6 to get the number of cubes. */
        if (target == TGSI_TEXTURE_CUBE_ARRAY ||
            target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
                LLVMBuilderRef builder = bld_base->base.gallivm->builder;
                LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 
2);
                LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 
6);

                LLVMValueRef v4 = emit_data->output[emit_data->chan];
                LLVMValueRef z = LLVMBuildExtractElement(builder, v4, two, "");
@@ -4728,21 +4734,21 @@ static void si_lower_gather4_integer(struct 
si_shader_context *ctx,
                tmp = LLVMBuildBitCast(builder, tmp, ctx->f32, "");
                tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
                tmp = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
                coord = LLVMBuildInsertElement(builder, coord, tmp, index, "");
        }

        emit_data->args[0] = coord;
        emit_data->output[emit_data->chan] =
                lp_build_intrinsic(builder, intr_name, emit_data->dst_type,
                                   emit_data->args, emit_data->arg_count,
-                                  LP_FUNC_ATTR_READNONE);
+                                  LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
 }

 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
                                struct lp_build_tgsi_context *bld_base,
                                struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct lp_build_context *base = &bld_base->base;
        const struct tgsi_full_instruction *inst = emit_data->inst;
        unsigned opcode = inst->Instruction.Opcode;
@@ -4752,21 +4758,21 @@ static void build_tex_intrinsic(const struct 
lp_build_tgsi_action *action,
        bool is_shadow = tgsi_is_shadow_target(target);
        char type[64];
        const char *name = "llvm.SI.image.sample";
        const char *infix = "";

        if (target == TGSI_TEXTURE_BUFFER) {
                emit_data->output[emit_data->chan] = lp_build_intrinsic(
                        base->gallivm->builder,
                        "llvm.SI.vs.load.input", emit_data->dst_type,
                        emit_data->args, emit_data->arg_count,
-                       LP_FUNC_ATTR_READNONE);
+                       LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
                return;
        }

        switch (opcode) {
        case TGSI_OPCODE_TXF:
                name = target == TGSI_TEXTURE_2D_MSAA ||
                       target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
                               "llvm.SI.image.load" :
                               "llvm.SI.image.load.mip";
                is_shadow = false;
@@ -4829,21 +4835,21 @@ static void build_tex_intrinsic(const struct 
lp_build_tgsi_action *action,
                         */
                        si_lower_gather4_integer(ctx, emit_data, intr_name,
                                                 (int)has_offset + 
(int)is_shadow);
                        return;
                }
        }

        emit_data->output[emit_data->chan] = lp_build_intrinsic(
                base->gallivm->builder, intr_name, emit_data->dst_type,
                emit_data->args, emit_data->arg_count,
-               LP_FUNC_ATTR_READNONE);
+               LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY);
 }

 static void si_llvm_emit_txqs(
        const struct lp_build_tgsi_action *action,
        struct lp_build_tgsi_context *bld_base,
        struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        LLVMBuilderRef builder = gallivm->builder;
@@ -5118,21 +5124,21 @@ static void si_llvm_emit_vertex(
                                 lp_build_const_int32(gallivm,
                                                      
shader->selector->gs_max_out_vertices), "");

        bool use_kill = !info->writes_memory;
        if (use_kill) {
                kill = lp_build_select(&bld_base->base, can_emit,
                                       lp_build_const_float(gallivm, 1.0f),
                                       lp_build_const_float(gallivm, -1.0f));

                lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
-                                  ctx->voidt, &kill, 1, 0);
+                                  ctx->voidt, &kill, 1, LP_FUNC_ATTR_LEGACY);
        } else {
                lp_build_if(&if_state, gallivm, can_emit);
        }

        offset = 0;
        for (i = 0; i < info->num_outputs; i++) {
                LLVMValueRef *out_ptr = ctx->outputs[i];

                for (chan = 0; chan < 4; chan++) {
                        if (!(info->output_usagemask[i] & (1 << chan)) ||
@@ -5238,24 +5244,26 @@ static void si_create_function(struct si_shader_context 
*ctx,
                LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);

                /* The combination of:
                 * - ByVal
                 * - dereferenceable
                 * - invariant.load
                 * allows the optimization passes to move loads and reduces
                 * SGPR spilling significantly.
                 */
                if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
-                       lp_add_function_attr(ctx->main_fn, i + 1, 
LP_FUNC_ATTR_BYVAL);
+                       lp_add_function_attr(ctx->gallivm.context, ctx->main_fn,
+                                             i + 1, LP_FUNC_ATTR_BYVAL);
                        lp_add_attr_dereferenceable(P, UINT64_MAX);
                } else
-                       lp_add_function_attr(ctx->main_fn, i + 1, 
LP_FUNC_ATTR_INREG);
+                       lp_add_function_attr(ctx->gallivm.context, ctx->main_fn,
+                                             i + 1, LP_FUNC_ATTR_INREG);
        }

        LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
                                           "no-signed-zeros-fp-math",
                                           "true");

        if (ctx->screen->b.debug_flags & DBG_UNSAFE_MATH) {
                /* These were copied from some LLVM test. */
                LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
                                                   "less-precise-fpmad",
@@ -5740,21 +5748,22 @@ static void si_llvm_emit_polygon_stipple(struct 
si_shader_context *ctx,
        offset = LLVMBuildMul(builder, address[1],
                              LLVMConstInt(ctx->i32, 4, 0), "");
        row = buffer_load_const(ctx, desc, offset);
        row = LLVMBuildBitCast(builder, row, ctx->i32, "");
        bit = LLVMBuildLShr(builder, row, address[0], "");
        bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");

        /* The intrinsic kills the thread if arg < 0. */
        bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0),
                              LLVMConstReal(ctx->f32, -1), "");
-       lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1, 0);
+       lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1,
+                          LP_FUNC_ATTR_LEGACY);
 }

 void si_shader_binary_read_config(struct radeon_shader_binary *binary,
                                  struct si_shader_config *conf,
                                  unsigned symbol_offset)
 {
        unsigned i;
        const unsigned char *config =
                radeon_shader_binary_config_start(binary, symbol_offset);
        bool really_needs_scratch = false;
@@ -6308,23 +6317,24 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
                                }

                                args[2] = lp_build_const_int32(
                                        gallivm,
                                        offset * 
gs_selector->gs_max_out_vertices * 16 * 4);
                                offset++;

                                outputs[i].values[chan] =
                                        LLVMBuildBitCast(gallivm->builder,
                                                 
lp_build_intrinsic(gallivm->builder,
-                                                                
"llvm.SI.buffer.load.dword.i32.i32",
-                                                                ctx.i32, args, 
9,
-                                                                
LP_FUNC_ATTR_READONLY),
+                                                                   
"llvm.SI.buffer.load.dword.i32.i32",
+                                                                   ctx.i32, 
args, 9,
+                                                                   
LP_FUNC_ATTR_READONLY |
+                                                                   
LP_FUNC_ATTR_LEGACY),
                                                 ctx.f32, "");
                        }
                }

                /* Streamout and exports. */
                if (gs_selector->so.num_outputs) {
                        si_llvm_emit_streamout(&ctx, outputs,
                                               gsinfo->num_outputs,
                                               stream);
                }
@@ -7047,21 +7057,22 @@ static void si_build_wrapper_function(struct 
si_shader_context *ctx,
        LLVMValueRef out[48];
        LLVMTypeRef function_type;
        unsigned num_params;
        unsigned num_out;
        MAYBE_UNUSED unsigned num_out_sgpr; /* used in debug checks */
        unsigned num_sgprs, num_vgprs;
        unsigned last_sgpr_param;
        unsigned gprs;

        for (unsigned i = 0; i < num_parts; ++i) {
-               lp_add_function_attr(parts[i], -1, LP_FUNC_ATTR_ALWAYSINLINE);
+               lp_add_function_attr(gallivm->context, parts[i], -1,
+                                    LP_FUNC_ATTR_ALWAYSINLINE);
                LLVMSetLinkage(parts[i], LLVMPrivateLinkage);
        }

        /* The parameters of the wrapper function correspond to those of the
         * first part in terms of SGPRs and VGPRs, but we use the types of the
         * main part to get the right types. This is relevant for the
         * dereferenceable attribute on descriptor table pointers.
         */
        num_sgprs = 0;
        num_vgprs = 0;
@@ -7164,21 +7175,22 @@ static void si_build_wrapper_function(struct 
si_shader_context *ctx,
                        param_size = llvm_get_type_size(param_type) / 4;
                        is_sgpr = ac_is_sgpr_param(param);

                        if (is_sgpr) {
 #if HAVE_LLVM < 0x0400
                                LLVMRemoveAttribute(param, LLVMByValAttribute);
 #else
                                unsigned kind_id = 
LLVMGetEnumAttributeKindForName("byval", 5);
                                LLVMRemoveEnumAttributeAtIndex(parts[part], 
param_idx + 1, kind_id);
 #endif
-                               lp_add_function_attr(parts[part], param_idx + 
1, LP_FUNC_ATTR_INREG);
+                               lp_add_function_attr(gallivm->context, 
parts[part],
+                                                    param_idx + 1, 
LP_FUNC_ATTR_INREG);
                        }

                        assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr 
: num_out));
                        assert(is_sgpr || out_idx >= num_out_sgpr);

                        if (param_size == 1)
                                arg = out[out_idx];
                        else
                                arg = lp_build_gather_values(gallivm, 
&out[out_idx], param_size);

@@ -7679,21 +7691,21 @@ static void si_build_vs_epilog_function(struct 
si_shader_context *ctx,
                                               
key->vs_epilog.prim_id_param_offset);
                args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
                args[5] = LLVMGetParam(ctx->main_fn,
                                       VS_EPILOG_PRIMID_LOC); /* X */
                args[6] = base->undef; /* Y */
                args[7] = base->undef; /* Z */
                args[8] = base->undef; /* W */

                lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
                                   
LLVMVoidTypeInContext(base->gallivm->context),
-                                  args, 9, 0);
+                                  args, 9, LP_FUNC_ATTR_LEGACY);
        }

        LLVMBuildRetVoid(gallivm->builder);
 }

 /**
  * Create & compile a vertex shader epilog. This a helper used by VS and TES.
  */
 static bool si_get_vs_epilog(struct si_screen *sscreen,
                             LLVMTargetMachineRef tm,
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index 10268e9..ee59fed 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -51,27 +51,23 @@ static void kill_if_fetch_args(struct lp_build_tgsi_context 
*bld_base,
        emit_data->arg_count = 1;
        emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
                                        lp_build_const_float(gallivm, -1.0f),
                                        bld_base->base.zero, "");
 }

 static void kil_emit(const struct lp_build_tgsi_action *action,
                     struct lp_build_tgsi_context *bld_base,
                     struct lp_build_emit_data *emit_data)
 {
-       unsigned i;
-       for (i = 0; i < emit_data->arg_count; i++) {
-               emit_data->output[i] = lp_build_intrinsic_unary(
-                       bld_base->base.gallivm->builder,
-                       action->intr_name,
-                       emit_data->dst_type, emit_data->args[i]);
-       }
+       lp_build_intrinsic(bld_base->base.gallivm->builder,
+                          action->intr_name, emit_data->dst_type,
+                          &emit_data->args[0], 1, LP_FUNC_ATTR_LEGACY);
 }

 static void emit_icmp(const struct lp_build_tgsi_action *action,
                      struct lp_build_tgsi_context *bld_base,
                      struct lp_build_emit_data *emit_data)
 {
        unsigned pred;
        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
        LLVMContextRef context = bld_base->base.gallivm->context;

@@ -500,21 +496,23 @@ static void emit_bfe(const struct lp_build_tgsi_action 
*action,
                     struct lp_build_tgsi_context *bld_base,
                     struct lp_build_emit_data *emit_data)
 {
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        LLVMValueRef bfe_sm5;
        LLVMValueRef cond;

        bfe_sm5 = lp_build_intrinsic(builder, action->intr_name,
                                     emit_data->dst_type, emit_data->args,
-                                    emit_data->arg_count, 
LP_FUNC_ATTR_READNONE);
+                                    emit_data->arg_count,
+                                    LP_FUNC_ATTR_READNONE |
+                                    LP_FUNC_ATTR_LEGACY);

        /* Correct for GLSL semantics. */
        cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2],
                             lp_build_const_int32(gallivm, 32), "");
        emit_data->output[emit_data->chan] =
                LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, "");
 }

 /* this is ffs in C */
 static void emit_lsb(const struct lp_build_tgsi_action *action,


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to