From: Marek Olšák <[email protected]>

Now it's able to generate ds_write2_b64 instead of ds_write2_b32.

-20 bytes in one shader binary. (having only 1 output)
---
 src/gallium/drivers/radeonsi/si_shader.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 32a6fa0..7c3bd8b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -391,20 +391,38 @@ static LLVMValueRef get_num_tcs_out_vertices(struct 
si_shader_context *ctx)
                ctx->shader->selector ?
                
ctx->shader->selector->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : 0;
 
        /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS 
epilog. */
        if (ctx->type == PIPE_SHADER_TESS_CTRL && tcs_out_vertices)
                return LLVMConstInt(ctx->i32, tcs_out_vertices, 0);
 
        return unpack_param(ctx, ctx->param_tcs_offchip_layout, 6, 6);
 }
 
+static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
+{
+       unsigned stride;
+
+       switch (ctx->type) {
+       case PIPE_SHADER_VERTEX:
+               stride = 
util_last_bit64(ctx->shader->selector->outputs_written);
+               return LLVMConstInt(ctx->i32, stride * 4, 0);
+
+       case PIPE_SHADER_TESS_CTRL:
+               return unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
+
+       default:
+               assert(0);
+               return NULL;
+       }
+}
+
 static LLVMValueRef get_instance_index_for_fetch(
        struct si_shader_context *ctx,
        unsigned param_start_instance, LLVMValueRef divisor)
 {
        struct gallivm_state *gallivm = &ctx->gallivm;
 
        LLVMValueRef result = ctx->abi.instance_id;
 
        /* The division must be done before START_INSTANCE is added. */
        if (divisor != ctx->i32_1)
@@ -1040,21 +1058,21 @@ static LLVMValueRef desc_from_addr_base64k(struct 
si_shader_context *ctx,
 }
 
 static LLVMValueRef fetch_input_tcs(
        struct lp_build_tgsi_context *bld_base,
        const struct tgsi_full_src_register *reg,
        enum tgsi_opcode_type type, unsigned swizzle)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMValueRef dw_addr, stride;
 
-       stride = unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
+       stride = get_tcs_in_vertex_dw_stride(ctx);
        dw_addr = get_tcs_in_current_patch_offset(ctx);
        dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
 
        return lds_load(bld_base, type, swizzle, dw_addr);
 }
 
 static LLVMValueRef fetch_output_tcs(
                struct lp_build_tgsi_context *bld_base,
                const struct tgsi_full_src_register *reg,
                enum tgsi_opcode_type type, unsigned swizzle)
@@ -2603,21 +2621,21 @@ static void si_copy_tcs_inputs(struct 
lp_build_tgsi_context *bld_base)
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef invocation_id, buffer, buffer_offset;
        LLVMValueRef lds_vertex_stride, lds_vertex_offset, lds_base;
        uint64_t inputs;
 
        invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
        buffer = desc_from_addr_base64k(ctx, 
ctx->param_tcs_offchip_addr_base64k);
        buffer_offset = LLVMGetParam(ctx->main_fn, 
ctx->param_tcs_offchip_offset);
 
-       lds_vertex_stride = unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
+       lds_vertex_stride = get_tcs_in_vertex_dw_stride(ctx);
        lds_vertex_offset = LLVMBuildMul(gallivm->builder, invocation_id,
                                         lds_vertex_stride, "");
        lds_base = get_tcs_in_current_patch_offset(ctx);
        lds_base = LLVMBuildAdd(gallivm->builder, lds_base, lds_vertex_offset, 
"");
 
        inputs = ctx->shader->key.mono.u.ff_tcs_inputs_to_copy;
        while (inputs) {
                unsigned i = u_bit_scan64(&inputs);
 
                LLVMValueRef lds_ptr = LLVMBuildAdd(gallivm->builder, lds_base,
@@ -3014,22 +3032,21 @@ static void si_set_es_return_value_for_gs(struct 
si_shader_context *ctx)
 
 static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct si_shader *shader = ctx->shader;
        struct tgsi_shader_info *info = &shader->selector->info;
        struct gallivm_state *gallivm = &ctx->gallivm;
        unsigned i, chan;
        LLVMValueRef vertex_id = LLVMGetParam(ctx->main_fn,
                                              ctx->param_rel_auto_id);
-       LLVMValueRef vertex_dw_stride =
-               unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
+       LLVMValueRef vertex_dw_stride = get_tcs_in_vertex_dw_stride(ctx);
        LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id,
                                                 vertex_dw_stride, "");
 
        /* Write outputs to LDS. The next shader (TCS aka HS) will read
         * its inputs from it. */
        for (i = 0; i < info->num_outputs; i++) {
                LLVMValueRef *out_ptr = ctx->outputs[i];
                unsigned name = info->output_semantic_name[i];
                unsigned index = info->output_semantic_index[i];
 
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to