From: Marek Olšák <[email protected]>

-44 bytes in a monolithic LS-HS binary.
---
 src/gallium/drivers/radeonsi/si_shader.c        | 5 +++++
 src/gallium/drivers/radeonsi/si_state_shaders.c | 7 ++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 7c3bd8b..d622304 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -401,20 +401,25 @@ static LLVMValueRef get_num_tcs_out_vertices(struct 
si_shader_context *ctx)
 static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
 {
        unsigned stride;
 
        switch (ctx->type) {
        case PIPE_SHADER_VERTEX:
                stride = 
util_last_bit64(ctx->shader->selector->outputs_written);
                return LLVMConstInt(ctx->i32, stride * 4, 0);
 
        case PIPE_SHADER_TESS_CTRL:
+               if (ctx->screen->b.chip_class >= GFX9 &&
+                   ctx->shader->is_monolithic) {
+                       stride = 
util_last_bit64(ctx->shader->key.part.tcs.ls->outputs_written);
+                       return LLVMConstInt(ctx->i32, stride * 4, 0);
+               }
                return unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
 
        default:
                assert(0);
                return NULL;
        }
 }
 
 static LLVMValueRef get_instance_index_for_fetch(
        struct si_shader_context *ctx,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 25fcead..fe25598 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1284,21 +1284,26 @@ static inline void si_shader_selector_key(struct 
pipe_context *ctx,
                                                  key, 
&key->part.tcs.ls_prolog);
                        key->part.tcs.ls = sctx->vs_shader.cso;
 
                        /* When the LS VGPR fix is needed, monolithic shaders
                         * can:
                         *  - avoid initializing EXEC in both the LS prolog
                         *    and the LS main part when !vs_needs_prolog
                         *  - remove the fixup for unused input VGPRs
                         */
                        key->part.tcs.ls_prolog.ls_vgpr_fix = sctx->ls_vgpr_fix;
-                       key->opt.prefer_mono = sctx->ls_vgpr_fix;
+
+                       /* The LS output / HS input layout can be communicated
+                        * directly instead of via user SGPRs for merged LS-HS.
+                        * The LS VGPR fix prefers this too.
+                        */
+                       key->opt.prefer_mono = 1;
                }
 
                key->part.tcs.epilog.prim_mode =
                        
sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
                key->part.tcs.epilog.invoc0_tess_factors_are_def =
                        sel->tcs_info.invoc0_tessfactors_are_def;
                key->part.tcs.epilog.tes_reads_tess_factors =
                        sctx->tes_shader.cso->info.reads_tess_factors;
 
                if (sel == sctx->fixed_func_tcs_shader.cso)
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to