From: Marek Olšák <[email protected]>

---
 src/gallium/drivers/radeonsi/si_shader.c          | 71 +++++++++--------------
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 12 ++--
 2 files changed, 31 insertions(+), 52 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index d80d10c..db9a0d7 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1005,30 +1005,29 @@ static LLVMValueRef buffer_load(struct 
lp_build_tgsi_context *bld_base,
  *
  * \param type         output value type
  * \param swizzle      offset (typically 0..3); it can be ~0, which loads a 
vec4
  * \param dw_addr      address in dwords
  */
 static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
                             enum tgsi_opcode_type type, unsigned swizzle,
                             LLVMValueRef dw_addr)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef value;
 
        if (swizzle == ~0) {
                LLVMValueRef values[TGSI_NUM_CHANNELS];
 
                for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
                        values[chan] = lds_load(bld_base, type, chan, dw_addr);
 
-               return lp_build_gather_values(gallivm, values,
+               return lp_build_gather_values(&ctx->gallivm, values,
                                              TGSI_NUM_CHANNELS);
        }
 
        dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
                            LLVMConstInt(ctx->i32, swizzle, 0));
 
        value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false);
        if (tgsi_type_is_64bit(type)) {
                LLVMValueRef value2;
                dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
@@ -1136,21 +1135,20 @@ static LLVMValueRef fetch_input_tes(
        return buffer_load(bld_base, type, swizzle, buffer, base, addr, true);
 }
 
 static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
                             const struct tgsi_full_instruction *inst,
                             const struct tgsi_opcode_info *info,
                             unsigned index,
                             LLVMValueRef dst[4])
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = &ctx->gallivm;
        const struct tgsi_full_dst_register *reg = &inst->Dst[index];
        const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info;
        unsigned chan_index;
        LLVMValueRef dw_addr, stride;
        LLVMValueRef buffer, base, buf_addr;
        LLVMValueRef values[4];
        bool skip_lds_store;
        bool is_tess_factor = false, is_tess_inner = false;
 
        /* Only handle per-patch and per-vertex outputs here.
@@ -1220,37 +1218,36 @@ static void store_output_tcs(struct 
lp_build_tgsi_context *bld_base,
                                LLVMBuildStore(ctx->ac.builder, value, /* outer 
*/
                                               
ctx->invoc0_tess_factors[chan_index]);
                        } else if (chan_index < 2) {
                                LLVMBuildStore(ctx->ac.builder, value, /* inner 
*/
                                               ctx->invoc0_tess_factors[4 + 
chan_index]);
                        }
                }
        }
 
        if (reg->Register.WriteMask == 0xF && !is_tess_factor) {
-               LLVMValueRef value = lp_build_gather_values(gallivm,
+               LLVMValueRef value = lp_build_gather_values(&ctx->gallivm,
                                                            values, 4);
                ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, 
buf_addr,
                                            base, 0, 1, 0, true, false);
        }
 }
 
 static LLVMValueRef fetch_input_gs(
        struct lp_build_tgsi_context *bld_base,
        const struct tgsi_full_src_register *reg,
        enum tgsi_opcode_type type,
        unsigned swizzle)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct si_shader *shader = ctx->shader;
        struct lp_build_context *uint = &ctx->bld_base.uint_bld;
-       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef vtx_offset, soffset;
        struct tgsi_shader_info *info = &shader->selector->info;
        unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
        unsigned semantic_index = 
info->input_semantic_index[reg->Register.Index];
        unsigned param;
        LLVMValueRef value;
 
        if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
                return get_primitive_id(ctx, swizzle);
 
@@ -1286,21 +1283,21 @@ static LLVMValueRef fetch_input_gs(
                return lds_load(bld_base, type, swizzle, vtx_offset);
        }
 
        /* GFX6: input load from the ESGS ring in memory. */
        if (swizzle == ~0) {
                LLVMValueRef values[TGSI_NUM_CHANNELS];
                unsigned chan;
                for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
                        values[chan] = fetch_input_gs(bld_base, reg, type, 
chan);
                }
-               return lp_build_gather_values(gallivm, values,
+               return lp_build_gather_values(&ctx->gallivm, values,
                                              TGSI_NUM_CHANNELS);
        }
 
        /* Get the vertex offset parameter on GFX6. */
        unsigned vtx_offset_param = reg->Dimension.Index;
        if (vtx_offset_param < 2) {
                vtx_offset_param += ctx->param_gs_vtx0_offset;
        } else {
                assert(vtx_offset_param < 6);
                vtx_offset_param += ctx->param_gs_vtx2_offset - 2;
@@ -1539,45 +1536,43 @@ static LLVMValueRef buffer_load_const(struct 
si_shader_context *ctx,
                                      LLVMValueRef resource,
                                      LLVMValueRef offset)
 {
        return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL,
                                    0, 0, 0, true, true);
 }
 
 static LLVMValueRef load_sample_position(struct si_shader_context *ctx, 
LLVMValueRef sample_id)
 {
        struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld;
-       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef desc = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
        LLVMValueRef buf_index = LLVMConstInt(ctx->i32, 
SI_PS_CONST_SAMPLE_POSITIONS, 0);
        LLVMValueRef resource = ac_build_indexed_load_const(&ctx->ac, desc, 
buf_index);
 
        /* offset = sample_id * 8  (8 = 2 floats containing samplepos.xy) */
        LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, sample_id, 8);
        LLVMValueRef offset1 = LLVMBuildAdd(ctx->ac.builder, offset0, 
LLVMConstInt(ctx->i32, 4, 0), "");
 
        LLVMValueRef pos[4] = {
                buffer_load_const(ctx, resource, offset0),
                buffer_load_const(ctx, resource, offset1),
                LLVMConstReal(ctx->f32, 0),
                LLVMConstReal(ctx->f32, 0)
        };
 
-       return lp_build_gather_values(gallivm, pos, 4);
+       return lp_build_gather_values(&ctx->gallivm, pos, 4);
 }
 
 void si_load_system_value(struct si_shader_context *ctx,
                          unsigned index,
                          const struct tgsi_full_declaration *decl)
 {
        struct lp_build_context *bld = &ctx->bld_base.base;
-       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef value = 0;
 
        assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES);
 
        switch (decl->Semantic.Name) {
        case TGSI_SEMANTIC_INSTANCEID:
                value = ctx->abi.instance_id;
                break;
 
        case TGSI_SEMANTIC_VERTEXID:
@@ -1630,21 +1625,21 @@ void si_load_system_value(struct si_shader_context *ctx,
        case TGSI_SEMANTIC_POSITION:
        {
                LLVMValueRef pos[4] = {
                        LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT),
                        LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT),
                        LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT),
                        lp_build_emit_llvm_unary(&ctx->bld_base, 
TGSI_OPCODE_RCP,
                                                 LLVMGetParam(ctx->main_fn,
                                                              
SI_PARAM_POS_W_FLOAT)),
                };
-               value = lp_build_gather_values(gallivm, pos, 4);
+               value = lp_build_gather_values(&ctx->gallivm, pos, 4);
                break;
        }
 
        case TGSI_SEMANTIC_FACE:
                value = ctx->abi.front_face;
                break;
 
        case TGSI_SEMANTIC_SAMPLEID:
                value = get_sample_id(ctx);
                break;
@@ -1653,21 +1648,21 @@ void si_load_system_value(struct si_shader_context *ctx,
                LLVMValueRef pos[4] = {
                        LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT),
                        LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT),
                        LLVMConstReal(ctx->f32, 0),
                        LLVMConstReal(ctx->f32, 0)
                };
                pos[0] = lp_build_emit_llvm_unary(&ctx->bld_base,
                                                  TGSI_OPCODE_FRC, pos[0]);
                pos[1] = lp_build_emit_llvm_unary(&ctx->bld_base,
                                                  TGSI_OPCODE_FRC, pos[1]);
-               value = lp_build_gather_values(gallivm, pos, 4);
+               value = lp_build_gather_values(&ctx->gallivm, pos, 4);
                break;
        }
 
        case TGSI_SEMANTIC_SAMPLEMASK:
                /* This can only occur with the OpenGL Core profile, which
                 * doesn't support smoothing.
                 */
                value = LLVMGetParam(ctx->main_fn, SI_PARAM_SAMPLE_COVERAGE);
                break;
 
@@ -1679,21 +1674,21 @@ void si_load_system_value(struct si_shader_context *ctx,
                        bld->zero,
                        bld->zero
                };
 
                /* For triangles, the vector should be (u, v, 1-u-v). */
                if 
(ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] ==
                    PIPE_PRIM_TRIANGLES)
                        coord[2] = lp_build_sub(bld, bld->one,
                                                lp_build_add(bld, coord[0], 
coord[1]));
 
-               value = lp_build_gather_values(gallivm, coord, 4);
+               value = lp_build_gather_values(&ctx->gallivm, coord, 4);
                break;
        }
 
        case TGSI_SEMANTIC_VERTICESIN:
                if (ctx->type == PIPE_SHADER_TESS_CTRL)
                        value = unpack_param(ctx, 
ctx->param_tcs_out_lds_layout, 26, 6);
                else if (ctx->type == PIPE_SHADER_TESS_EVAL)
                        value = get_num_tcs_out_vertices(ctx);
                else
                        assert(!"invalid shader stage for 
TGSI_SEMANTIC_VERTICESIN");
@@ -1724,21 +1719,21 @@ void si_load_system_value(struct si_shader_context *ctx,
                int i, offset;
 
                slot = LLVMConstInt(ctx->i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 
0);
                buf = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
                buf = ac_build_indexed_load_const(&ctx->ac, buf, slot);
                offset = decl->Semantic.Name == 
TGSI_SEMANTIC_DEFAULT_TESSINNER_SI ? 4 : 0;
 
                for (i = 0; i < 4; i++)
                        val[i] = buffer_load_const(ctx, buf,
                                                   LLVMConstInt(ctx->i32, 
(offset + i) * 4, 0));
-               value = lp_build_gather_values(gallivm, val, 4);
+               value = lp_build_gather_values(&ctx->gallivm, val, 4);
                break;
        }
 
        case TGSI_SEMANTIC_PRIMID:
                value = get_primitive_id(ctx, 0);
                break;
 
        case TGSI_SEMANTIC_GRID_SIZE:
                value = LLVMGetParam(ctx->main_fn, ctx->param_grid_size);
                break;
@@ -1752,39 +1747,39 @@ void si_load_system_value(struct si_shader_context *ctx,
                if (properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] != 0) {
                        unsigned sizes[3] = {
                                properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH],
                                properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT],
                                properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]
                        };
 
                        for (i = 0; i < 3; ++i)
                                values[i] = LLVMConstInt(ctx->i32, sizes[i], 0);
 
-                       value = lp_build_gather_values(gallivm, values, 3);
+                       value = lp_build_gather_values(&ctx->gallivm, values, 
3);
                } else {
                        value = LLVMGetParam(ctx->main_fn, 
ctx->param_block_size);
                }
                break;
        }
 
        case TGSI_SEMANTIC_BLOCK_ID:
        {
                LLVMValueRef values[3];
 
                for (int i = 0; i < 3; i++) {
                        values[i] = ctx->i32_0;
                        if (ctx->param_block_id[i] >= 0) {
                                values[i] = LLVMGetParam(ctx->main_fn,
                                                         
ctx->param_block_id[i]);
                        }
                }
-               value = lp_build_gather_values(gallivm, values, 3);
+               value = lp_build_gather_values(&ctx->gallivm, values, 3);
                break;
        }
 
        case TGSI_SEMANTIC_THREAD_ID:
                value = LLVMGetParam(ctx->main_fn, ctx->param_thread_id);
                break;
 
        case TGSI_SEMANTIC_HELPER_INVOCATION:
                value = lp_build_intrinsic(ctx->ac.builder,
                                           "llvm.amdgcn.ps.live",
@@ -1839,30 +1834,29 @@ void si_load_system_value(struct si_shader_context *ctx,
                return;
        }
 
        ctx->system_values[index] = value;
 }
 
 void si_declare_compute_memory(struct si_shader_context *ctx,
                               const struct tgsi_full_declaration *decl)
 {
        struct si_shader_selector *sel = ctx->shader->selector;
-       struct gallivm_state *gallivm = &ctx->gallivm;
 
        LLVMTypeRef i8p = LLVMPointerType(ctx->i8, LOCAL_ADDR_SPACE);
        LLVMValueRef var;
 
        assert(decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED);
        assert(decl->Range.First == decl->Range.Last);
        assert(!ctx->shared_memory);
 
-       var = LLVMAddGlobalInAddressSpace(gallivm->module,
+       var = LLVMAddGlobalInAddressSpace(ctx->ac.module,
                                          LLVMArrayType(ctx->i8, 
sel->local_size),
                                          "compute_lds",
                                          LOCAL_ADDR_SPACE);
        LLVMSetAlignment(var, 4);
 
        ctx->shared_memory = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
 }
 
 static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int 
i)
 {
@@ -2327,39 +2321,38 @@ static void emit_streamout_output(struct 
si_shader_context *ctx,
 /**
  * Write streamout data to buffers for vertex stream @p stream (different
  * vertex streams can occur for GS copy shaders).
  */
 static void si_llvm_emit_streamout(struct si_shader_context *ctx,
                                   struct si_shader_output_values *outputs,
                                   unsigned noutput, unsigned stream)
 {
        struct si_shader_selector *sel = ctx->shader->selector;
        struct pipe_stream_output_info *so = &sel->so;
-       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = ctx->ac.builder;
        int i;
        struct lp_build_if_state if_ctx;
 
        /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
        LLVMValueRef so_vtx_count =
                unpack_param(ctx, ctx->param_streamout_config, 16, 7);
 
        LLVMValueRef tid = ac_get_thread_id(&ctx->ac);
 
        /* can_emit = tid < so_vtx_count; */
        LLVMValueRef can_emit =
                LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
 
        /* Emit the streamout code conditionally. This actually avoids
         * out-of-bounds buffer access. The hw tells us via the SGPR
         * (so_vtx_count) which threads are allowed to emit streamout data. */
-       lp_build_if(&if_ctx, gallivm, can_emit);
+       lp_build_if(&if_ctx, &ctx->gallivm, can_emit);
        {
                /* The buffer offset is computed as follows:
                 *   ByteOffset = streamout_offset[buffer_id]*4 +
                 *                (streamout_write_index + 
thread_id)*stride[buffer_id] +
                 *                attrib_offset
                  */
 
                LLVMValueRef so_write_index =
                        LLVMGetParam(ctx->main_fn,
                                     ctx->param_streamout_write_index);
@@ -2658,39 +2651,38 @@ static void si_copy_tcs_inputs(struct 
lp_build_tgsi_context *bld_base)
 }
 
 static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
                                  LLVMValueRef rel_patch_id,
                                  LLVMValueRef invocation_id,
                                  LLVMValueRef 
tcs_out_current_patch_data_offset,
                                  LLVMValueRef invoc0_tf_outer[4],
                                  LLVMValueRef invoc0_tf_inner[2])
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = &ctx->gallivm;
        struct si_shader *shader = ctx->shader;
        unsigned tess_inner_index, tess_outer_index;
        LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
        LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
        unsigned stride, outer_comps, inner_comps, i, offset;
        struct lp_build_if_state if_ctx, inner_if_ctx;
 
        /* Add a barrier before loading tess factors from LDS. */
        if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
                si_llvm_emit_barrier(NULL, bld_base, NULL);
 
        /* Do this only for invocation 0, because the tess levels are per-patch,
         * not per-vertex.
         *
         * This can't jump, because invocation 0 executes this. It should
         * at least mask out the loads and stores for other invocations.
         */
-       lp_build_if(&if_ctx, gallivm,
+       lp_build_if(&if_ctx, &ctx->gallivm,
                    LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
                                  invocation_id, ctx->i32_0, ""));
 
        /* Determine the layout of one tess factor element in the buffer. */
        switch (shader->key.part.tcs.epilog.prim_mode) {
        case PIPE_PRIM_LINES:
                stride = 2; /* 2 dwords, 1 vec2 store */
                outer_comps = 2;
                inner_comps = 0;
                break;
@@ -2748,36 +2740,36 @@ static void si_write_tess_factors(struct 
lp_build_tgsi_context *bld_base,
        if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
                /* For isolines, the hardware expects tess factors in the
                 * reverse order from what GLSL / TGSI specify.
                 */
                LLVMValueRef tmp = out[0];
                out[0] = out[1];
                out[1] = tmp;
        }
 
        /* Convert the outputs to vectors for stores. */
-       vec0 = lp_build_gather_values(gallivm, out, MIN2(stride, 4));
+       vec0 = lp_build_gather_values(&ctx->gallivm, out, MIN2(stride, 4));
        vec1 = NULL;
 
        if (stride > 4)
-               vec1 = lp_build_gather_values(gallivm, out+4, stride - 4);
+               vec1 = lp_build_gather_values(&ctx->gallivm, out+4, stride - 4);
 
        /* Get the buffer. */
        buffer = desc_from_addr_base64k(ctx, 
ctx->param_tcs_factor_addr_base64k);
 
        /* Get the offset. */
        tf_base = LLVMGetParam(ctx->main_fn,
                               ctx->param_tcs_factor_offset);
        byteoffset = LLVMBuildMul(ctx->ac.builder, rel_patch_id,
                                  LLVMConstInt(ctx->i32, 4 * stride, 0), "");
 
-       lp_build_if(&inner_if_ctx, gallivm,
+       lp_build_if(&inner_if_ctx, &ctx->gallivm,
                    LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
                                  rel_patch_id, ctx->i32_0, ""));
 
        /* Store the dynamic HS control word. */
        offset = 0;
        if (ctx->screen->b.chip_class <= VI) {
                ac_build_buffer_store_dword(&ctx->ac, buffer,
                                            LLVMConstInt(ctx->i32, 0x80000000, 
0),
                                            1, ctx->i32_0, tf_base,
                                            offset, 1, 0, true, false);
@@ -2803,34 +2795,34 @@ static void si_write_tess_factors(struct 
lp_build_tgsi_context *bld_base,
                unsigned param_outer, param_inner;
 
                buf = desc_from_addr_base64k(ctx, 
ctx->param_tcs_offchip_addr_base64k);
                base = LLVMGetParam(ctx->main_fn, 
ctx->param_tcs_offchip_offset);
 
                param_outer = si_shader_io_get_unique_index_patch(
                                      TGSI_SEMANTIC_TESSOUTER, 0);
                tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, 
NULL,
                                        LLVMConstInt(ctx->i32, param_outer, 0));
 
-               outer_vec = lp_build_gather_values(gallivm, outer,
+               outer_vec = lp_build_gather_values(&ctx->gallivm, outer,
                                                   
util_next_power_of_two(outer_comps));
 
                ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
                                            outer_comps, tf_outer_offset,
                                            base, 0, 1, 0, true, false);
                if (inner_comps) {
                        param_inner = si_shader_io_get_unique_index_patch(
                                              TGSI_SEMANTIC_TESSINNER, 0);
                        tf_inner_offset = get_tcs_tes_buffer_address(ctx, 
rel_patch_id, NULL,
                                        LLVMConstInt(ctx->i32, param_inner, 0));
 
                        inner_vec = inner_comps == 1 ? inner[0] :
-                                   lp_build_gather_values(gallivm, inner, 
inner_comps);
+                                   lp_build_gather_values(&ctx->gallivm, 
inner, inner_comps);
                        ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
                                                    inner_comps, 
tf_inner_offset,
                                                    base, 0, 1, 0, true, false);
                }
        }
 
        lp_build_endif(&if_ctx);
 }
 
 static LLVMValueRef
@@ -3156,21 +3148,20 @@ static void si_llvm_emit_gs_epilogue(struct 
lp_build_tgsi_context *bld_base)
 
        if (ctx->screen->b.chip_class >= GFX9)
                lp_build_endif(&ctx->merged_wrap_if_state);
 }
 
 static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
                                     unsigned max_outputs,
                                     LLVMValueRef *addrs)
 {
        struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-       struct gallivm_state *gallivm = &ctx->gallivm;
        struct tgsi_shader_info *info = &ctx->shader->selector->info;
        struct si_shader_output_values *outputs = NULL;
        int i,j;
 
        assert(!ctx->shader->is_gs_copy_shader);
        assert(info->num_outputs <= max_outputs);
 
        outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
 
        /* Vertex color clamping.
@@ -3189,21 +3180,21 @@ static void si_llvm_emit_vs_epilogue(struct 
ac_shader_abi *abi,
                            info->output_semantic_name[i] != 
TGSI_SEMANTIC_BCOLOR)
                                continue;
 
                        /* We've found a color. */
                        if (!cond) {
                                /* The state is in the first bit of the user 
SGPR. */
                                cond = LLVMGetParam(ctx->main_fn,
                                                    ctx->param_vs_state_bits);
                                cond = LLVMBuildTrunc(ctx->ac.builder, cond,
                                                      ctx->i1, "");
-                               lp_build_if(&if_ctx, gallivm, cond);
+                               lp_build_if(&if_ctx, &ctx->gallivm, cond);
                        }
 
                        for (j = 0; j < 4; j++) {
                                addr = addrs[4 * i + j];
                                val = LLVMBuildLoad(ctx->ac.builder, addr, "");
                                val = ac_build_clamp(&ctx->ac, val);
                                LLVMBuildStore(ctx->ac.builder, val, addr);
                        }
                }
 
@@ -3626,40 +3617,38 @@ static void si_llvm_emit_ddxy(
 /*
  * this takes an I,J coordinate pair,
  * and works out the X and Y derivatives.
  * it returns DDX(I), DDX(J), DDY(I), DDY(J).
  */
 static LLVMValueRef si_llvm_emit_ddxy_interp(
        struct lp_build_tgsi_context *bld_base,
        LLVMValueRef interp_ij)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef result[4], a;
        unsigned i;
 
        for (i = 0; i < 2; i++) {
                a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij,
                                            LLVMConstInt(ctx->i32, i, 0), "");
                result[i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDX, 
a);
                result[2+i] = lp_build_emit_llvm_unary(bld_base, 
TGSI_OPCODE_DDY, a);
        }
 
-       return lp_build_gather_values(gallivm, result, 4);
+       return lp_build_gather_values(&ctx->gallivm, result, 4);
 }
 
 static void interp_fetch_args(
        struct lp_build_tgsi_context *bld_base,
        struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = &ctx->gallivm;
        const struct tgsi_full_instruction *inst = emit_data->inst;
 
        if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
                /* offset is in second src, first two channels */
                emit_data->args[0] = lp_build_emit_fetch(bld_base,
                                                         emit_data->inst, 1,
                                                         TGSI_CHAN_X);
                emit_data->args[1] = lp_build_emit_fetch(bld_base,
                                                         emit_data->inst, 1,
                                                         TGSI_CHAN_Y);
@@ -3691,21 +3680,21 @@ static void interp_fetch_args(
                 * sample position doesn't work.
                 */
                if 
(ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center) {
                        LLVMValueRef center[4] = {
                                LLVMConstReal(ctx->f32, 0.5),
                                LLVMConstReal(ctx->f32, 0.5),
                                ctx->ac.f32_0,
                                ctx->ac.f32_0,
                        };
 
-                       sample_position = lp_build_gather_values(gallivm, 
center, 4);
+                       sample_position = lp_build_gather_values(&ctx->gallivm, 
center, 4);
                } else {
                        sample_position = load_sample_position(ctx, sample_id);
                }
 
                emit_data->args[0] = LLVMBuildExtractElement(ctx->ac.builder,
                                                             sample_position,
                                                             ctx->i32_0, "");
 
                emit_data->args[0] = LLVMBuildFSub(ctx->ac.builder, 
emit_data->args[0], halfval, "");
                emit_data->args[1] = LLVMBuildExtractElement(ctx->ac.builder,
@@ -3715,21 +3704,20 @@ static void interp_fetch_args(
                emit_data->arg_count = 2;
        }
 }
 
 static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
                                struct lp_build_tgsi_context *bld_base,
                                struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct si_shader *shader = ctx->shader;
-       struct gallivm_state *gallivm = &ctx->gallivm;
        const struct tgsi_shader_info *info = &shader->selector->info;
        LLVMValueRef interp_param;
        const struct tgsi_full_instruction *inst = emit_data->inst;
        const struct tgsi_full_src_register *input = &inst->Src[0];
        int input_base, input_array_size;
        int chan;
        int i;
        LLVMValueRef prim_mask = LLVMGetParam(ctx->main_fn, SI_PARAM_PRIM_MASK);
        LLVMValueRef array_idx;
        int interp_param_idx;
@@ -3800,21 +3788,21 @@ static void build_interp_intrinsic(const struct 
lp_build_tgsi_action *action,
                        interp_el = ac_to_float(&ctx->ac, interp_el);
 
                        temp1 = LLVMBuildFMul(ctx->ac.builder, ddx_el, 
emit_data->args[0], "");
 
                        temp1 = LLVMBuildFAdd(ctx->ac.builder, temp1, 
interp_el, "");
 
                        temp2 = LLVMBuildFMul(ctx->ac.builder, ddy_el, 
emit_data->args[1], "");
 
                        ij_out[i] = LLVMBuildFAdd(ctx->ac.builder, temp2, 
temp1, "");
                }
-               interp_param = lp_build_gather_values(gallivm, ij_out, 2);
+               interp_param = lp_build_gather_values(&ctx->gallivm, ij_out, 2);
        }
 
        if (interp_param)
                interp_param = ac_to_float(&ctx->ac, interp_param);
 
        for (chan = 0; chan < 4; chan++) {
                LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->f32, 
input_array_size));
                unsigned schan = 
tgsi_util_get_full_src_register_swizzle(&inst->Src[0], chan);
 
                for (unsigned idx = 0; idx < input_array_size; ++idx) {
@@ -3944,21 +3932,20 @@ static unsigned si_llvm_get_stream(struct 
lp_build_tgsi_context *bld_base,
 /* Emit one vertex from the geometry shader */
 static void si_llvm_emit_vertex(
        const struct lp_build_tgsi_action *action,
        struct lp_build_tgsi_context *bld_base,
        struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct lp_build_context *uint = &bld_base->uint_bld;
        struct si_shader *shader = ctx->shader;
        struct tgsi_shader_info *info = &shader->selector->info;
-       struct gallivm_state *gallivm = &ctx->gallivm;
        struct lp_build_if_state if_state;
        LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
                                            ctx->param_gs2vs_offset);
        LLVMValueRef gs_next_vertex;
        LLVMValueRef can_emit, kill;
        unsigned chan, offset;
        int i;
        unsigned stream;
 
        stream = si_llvm_get_stream(bld_base, emit_data);
@@ -3981,21 +3968,21 @@ static void si_llvm_emit_vertex(
                                              
shader->selector->gs_max_out_vertices, 0), "");
 
        bool use_kill = !info->writes_memory;
        if (use_kill) {
                kill = lp_build_select(&bld_base->base, can_emit,
                                       LLVMConstReal(ctx->f32, 1.0f),
                                       LLVMConstReal(ctx->f32, -1.0f));
 
                ac_build_kill(&ctx->ac, kill);
        } else {
-               lp_build_if(&if_state, gallivm, can_emit);
+               lp_build_if(&if_state, &ctx->gallivm, can_emit);
        }
 
        offset = 0;
        for (i = 0; i < info->num_outputs; i++) {
                LLVMValueRef *out_ptr = ctx->outputs[i];
 
                for (chan = 0; chan < 4; chan++) {
                        if (!(info->output_usagemask[i] & (1 << chan)) ||
                            ((info->output_streams[i] >> (2 * chan)) & 3) != 
stream)
                                continue;
@@ -6058,21 +6045,20 @@ static void si_build_gs_prolog_function(struct 
si_shader_context *ctx,
 /**
  * Given a list of shader part functions, build a wrapper function that
  * runs them in sequence to form a monolithic shader.
  */
 static void si_build_wrapper_function(struct si_shader_context *ctx,
                                      LLVMValueRef *parts,
                                      unsigned num_parts,
                                      unsigned main_part,
                                      unsigned next_shader_first_part)
 {
-       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = ctx->ac.builder;
        /* PS epilog has one arg per color component; gfx9 merged shader
         * prologs need to forward 32 user SGPRs.
         */
        struct si_function_info fninfo;
        LLVMValueRef initial[64], out[64];
        LLVMTypeRef function_type;
        unsigned num_first_params;
        unsigned num_out, initial_num_out;
        MAYBE_UNUSED unsigned num_out_sgpr; /* used in debug checks */
@@ -6215,21 +6201,21 @@ static void si_build_wrapper_function(struct 
si_shader_context *ctx,
 #endif
                                lp_add_function_attr(parts[part], param_idx + 
1, LP_FUNC_ATTR_INREG);
                        }
 
                        assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr 
: num_out));
                        assert(is_sgpr || out_idx >= num_out_sgpr);
 
                        if (param_size == 1)
                                arg = out[out_idx];
                        else
-                               arg = lp_build_gather_values(gallivm, 
&out[out_idx], param_size);
+                               arg = lp_build_gather_values(&ctx->gallivm, 
&out[out_idx], param_size);
 
                        if (LLVMTypeOf(arg) != param_type) {
                                if (LLVMGetTypeKind(param_type) == 
LLVMPointerTypeKind) {
                                        arg = LLVMBuildBitCast(builder, arg, 
ctx->i64, "");
                                        arg = LLVMBuildIntToPtr(builder, arg, 
param_type, "");
                                } else {
                                        arg = LLVMBuildBitCast(builder, arg, 
param_type, "");
                                }
                        }
 
@@ -6636,21 +6622,20 @@ si_get_shader_part(struct si_screen *sscreen,
                        return result;
                }
        }
 
        /* Compile a new one. */
        result = CALLOC_STRUCT(si_shader_part);
        result->key = *key;
 
        struct si_shader shader = {};
        struct si_shader_context ctx;
-       struct gallivm_state *gallivm = &ctx.gallivm;
 
        si_init_shader_ctx(&ctx, sscreen, tm);
        ctx.shader = &shader;
        ctx.type = type;
 
        switch (type) {
        case PIPE_SHADER_VERTEX:
                break;
        case PIPE_SHADER_TESS_CTRL:
                assert(!prolog);
@@ -6668,44 +6653,43 @@ si_get_shader_part(struct si_screen *sscreen,
        default:
                unreachable("bad shader part");
        }
 
        build(&ctx, key);
 
        /* Compile. */
        si_llvm_optimize_module(&ctx);
 
        if (si_compile_llvm(sscreen, &result->binary, &result->config, tm,
-                           gallivm->module, debug, ctx.type, name)) {
+                           ctx.ac.module, debug, ctx.type, name)) {
                FREE(result);
                result = NULL;
                goto out;
        }
 
        result->next = *list;
        *list = result;
 
 out:
        si_llvm_dispose(&ctx);
        mtx_unlock(&sscreen->shader_parts_mutex);
        return result;
 }
 
 static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
 {
-       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef ptr[2], list;
 
        /* Get the pointer to rw buffers. */
        ptr[0] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS);
        ptr[1] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS_HI);
-       list = lp_build_gather_values(gallivm, ptr, 2);
+       list = lp_build_gather_values(&ctx->gallivm, ptr, 2);
        list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, "");
        list = LLVMBuildIntToPtr(ctx->ac.builder, list,
                                 si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS), 
"");
        return list;
 }
 
 /**
  * Build the vertex shader prolog function.
  *
  * The inputs are the same as VS (a lot of SGPRs and 4 VGPR system values).
@@ -7042,21 +7026,20 @@ static bool si_shader_select_gs_parts(struct si_screen 
*sscreen,
  * - overriding interpolation parameters for the API PS
  * - polygon stippling
  *
  * All preloaded SGPRs and VGPRs are passed through unmodified unless they are
  * overriden by other states. (e.g. per-sample interpolation)
  * Interpolated colors are stored after the preloaded VGPRs.
  */
 static void si_build_ps_prolog_function(struct si_shader_context *ctx,
                                        union si_shader_part_key *key)
 {
-       struct gallivm_state *gallivm = &ctx->gallivm;
        struct si_function_info fninfo;
        LLVMValueRef ret, func;
        int num_returns, i, num_color_channels;
 
        assert(si_need_ps_prolog(key));
 
        si_init_function_info(&fninfo);
 
        /* Declare inputs. */
        for (i = 0; i < key->ps_prolog.num_input_sgprs; i++)
@@ -7227,21 +7210,21 @@ static void si_build_ps_prolog_function(struct 
si_shader_context *ctx,
                /* If the interpolation qualifier is not CONSTANT (-1). */
                if (key->ps_prolog.color_interp_vgpr_index[i] != -1) {
                        unsigned interp_vgpr = key->ps_prolog.num_input_sgprs +
                                               
key->ps_prolog.color_interp_vgpr_index[i];
 
                        /* Get the (i,j) updated by bc_optimize handling. */
                        interp[0] = LLVMBuildExtractValue(ctx->ac.builder, ret,
                                                          interp_vgpr, "");
                        interp[1] = LLVMBuildExtractValue(ctx->ac.builder, ret,
                                                          interp_vgpr + 1, "");
-                       interp_ij = lp_build_gather_values(gallivm, interp, 2);
+                       interp_ij = lp_build_gather_values(&ctx->gallivm, 
interp, 2);
                }
 
                /* Use the absolute location of the input. */
                prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
 
                if (key->ps_prolog.states.color_two_side) {
                        face = LLVMGetParam(func, face_vgpr);
                        face = ac_to_integer(&ctx->ac, face);
                }
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index d7ba5c4..7c2afe3 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -226,21 +226,20 @@ image_fetch_rsrc(
                                   target == TGSI_TEXTURE_BUFFER ? 
AC_DESC_BUFFER : AC_DESC_IMAGE,
                                   dcc_off);
 }
 
 static LLVMValueRef image_fetch_coords(
                struct lp_build_tgsi_context *bld_base,
                const struct tgsi_full_instruction *inst,
                unsigned src, LLVMValueRef desc)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = ctx->ac.builder;
        unsigned target = inst->Memory.Texture;
        unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
        LLVMValueRef coords[4];
        LLVMValueRef tmp;
        int chan;
 
        for (chan = 0; chan < num_coords; ++chan) {
                tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
                tmp = ac_to_integer(&ctx->ac, tmp);
@@ -276,21 +275,21 @@ static LLVMValueRef image_fetch_coords(
 
        if (num_coords == 1)
                return coords[0];
 
        if (num_coords == 3) {
                /* LLVM has difficulties lowering 3-element vectors. */
                coords[3] = bld_base->uint_bld.undef;
                num_coords = 4;
        }
 
-       return lp_build_gather_values(gallivm, coords, num_coords);
+       return lp_build_gather_values(&ctx->gallivm, coords, num_coords);
 }
 
 /**
  * Append the extra mode bits that are used by image load and store.
  */
 static void image_append_args(
                struct si_shader_context *ctx,
                struct lp_build_emit_data * emit_data,
                unsigned target,
                bool atomic,
@@ -467,38 +466,37 @@ static LLVMValueRef get_memory_ptr(struct 
si_shader_context *ctx,
        ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, addr_space), 
"");
 
        return ptr;
 }
 
 static void load_emit_memory(
                struct si_shader_context *ctx,
                struct lp_build_emit_data *emit_data)
 {
        const struct tgsi_full_instruction *inst = emit_data->inst;
-       struct gallivm_state *gallivm = &ctx->gallivm;
        unsigned writemask = inst->Dst[0].Register.WriteMask;
        LLVMValueRef channels[4], ptr, derived_ptr, index;
        int chan;
 
        ptr = get_memory_ptr(ctx, inst, ctx->f32, 1);
 
        for (chan = 0; chan < 4; ++chan) {
                if (!(writemask & (1 << chan))) {
                        channels[chan] = LLVMGetUndef(ctx->f32);
                        continue;
                }
 
                index = LLVMConstInt(ctx->i32, chan, 0);
                derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
                channels[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, 
"");
        }
-       emit_data->output[emit_data->chan] = lp_build_gather_values(gallivm, 
channels, 4);
+       emit_data->output[emit_data->chan] = 
lp_build_gather_values(&ctx->gallivm, channels, 4);
 }
 
 /**
  * Return true if the memory accessed by a LOAD or STORE instruction is
  * read-only or write-only, respectively.
  *
  * \param shader_buffers_reverse_access_mask
  *     For LOAD, set this to (store | atomic) slot usage in the shader.
  *     For STORE, set this to (load | atomic) slot usage in the shader.
  * \param images_reverse_access_mask  Same as above, but for images.
@@ -613,34 +611,33 @@ static void load_emit(
                                emit_data->args, emit_data->arg_count,
                                get_load_intr_attribs(can_speculate));
        }
 }
 
 static void store_fetch_args(
                struct lp_build_tgsi_context * bld_base,
                struct lp_build_emit_data * emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = &ctx->gallivm;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        struct tgsi_full_src_register memory;
        LLVMValueRef chans[4];
        LLVMValueRef data;
        LLVMValueRef rsrc;
        unsigned chan;
 
        emit_data->dst_type = ctx->voidt;
 
        for (chan = 0; chan < 4; ++chan) {
                chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
        }
-       data = lp_build_gather_values(gallivm, chans, 4);
+       data = lp_build_gather_values(&ctx->gallivm, chans, 4);
 
        emit_data->args[emit_data->arg_count++] = data;
 
        memory = tgsi_full_src_register_from_dst(&inst->Dst[0]);
 
        if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
                LLVMValueRef offset;
                LLVMValueRef tmp;
 
                rsrc = shader_buffer_fetch_rsrc(ctx, &memory, false);
@@ -992,29 +989,28 @@ static void atomic_emit(
        emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp);
 }
 
 static void set_tex_fetch_args(struct si_shader_context *ctx,
                               struct lp_build_emit_data *emit_data,
                               unsigned target,
                               LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
                               LLVMValueRef *param, unsigned count,
                               unsigned dmask)
 {
-       struct gallivm_state *gallivm = &ctx->gallivm;
        struct ac_image_args args = {};
 
        /* Pad to power of two vector */
        while (count < util_next_power_of_two(count))
                param[count++] = LLVMGetUndef(ctx->i32);
 
        if (count > 1)
-               args.addr = lp_build_gather_values(gallivm, param, count);
+               args.addr = lp_build_gather_values(&ctx->gallivm, param, count);
        else
                args.addr = param[0];
 
        args.resource = res_ptr;
        args.sampler = samp_ptr;
        args.dmask = dmask;
        args.unorm = target == TGSI_TEXTURE_RECT ||
                     target == TGSI_TEXTURE_SHADOWRECT;
        args.da = tgsi_is_array_sampler(target);
 
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to