From: Marek Olšák <[email protected]>

---
 src/gallium/drivers/radeonsi/si_pm4.c           |  9 +++++++-
 src/gallium/drivers/radeonsi/si_pm4.h           |  1 +
 src/gallium/drivers/radeonsi/si_state_shaders.c | 30 +++++++++++++------------
 3 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pm4.c 
b/src/gallium/drivers/radeonsi/si_pm4.c
index c3032fc..386d093 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -96,25 +96,32 @@ void si_pm4_add_bo(struct si_pm4_state *state,
                   enum radeon_bo_priority priority)
 {
        unsigned idx = state->nbo++;
        assert(idx < SI_PM4_MAX_BO);
 
        r600_resource_reference(&state->bo[idx], bo);
        state->bo_usage[idx] = usage;
        state->bo_priority[idx] = priority;
 }
 
-void si_pm4_free_state_simple(struct si_pm4_state *state)
+void si_pm4_clear_state(struct si_pm4_state *state)
 {
        for (int i = 0; i < state->nbo; ++i)
                r600_resource_reference(&state->bo[i], NULL);
        r600_resource_reference(&state->indirect_buffer, NULL);
+       state->nbo = 0;
+       state->ndw = 0;
+}
+
+void si_pm4_free_state_simple(struct si_pm4_state *state)
+{
+       si_pm4_clear_state(state);
        FREE(state);
 }
 
 void si_pm4_free_state(struct si_context *sctx,
                       struct si_pm4_state *state,
                       unsigned idx)
 {
        if (!state)
                return;
 
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h 
b/src/gallium/drivers/radeonsi/si_pm4.h
index 35fa6c3..9b02a80 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -64,20 +64,21 @@ void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t 
dw);
 void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate);
 
 void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val);
 void si_pm4_add_bo(struct si_pm4_state *state,
                   struct r600_resource *bo,
                   enum radeon_bo_usage usage,
                   enum radeon_bo_priority priority);
 void si_pm4_upload_indirect_buffer(struct si_context *sctx,
                                   struct si_pm4_state *state);
 
+void si_pm4_clear_state(struct si_pm4_state *state);
 void si_pm4_free_state_simple(struct si_pm4_state *state);
 void si_pm4_free_state(struct si_context *sctx,
                       struct si_pm4_state *state,
                       unsigned idx);
 
 void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state);
 void si_pm4_emit_dirty(struct si_context *sctx);
 void si_pm4_reset_emitted(struct si_context *sctx);
 
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 816aadc..acbceba 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -322,27 +322,37 @@ static void si_set_tesseval_regs(struct si_screen 
*sscreen,
        } else
                distribution_mode = V_028B6C_DISTRIBUTION_MODE_NO_DIST;
 
        si_pm4_set_reg(pm4, R_028B6C_VGT_TF_PARAM,
                       S_028B6C_TYPE(type) |
                       S_028B6C_PARTITIONING(partitioning) |
                       S_028B6C_TOPOLOGY(topology) |
                       S_028B6C_DISTRIBUTION_MODE(distribution_mode));
 }
 
+static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader)
+{
+       if (shader->pm4)
+               si_pm4_clear_state(shader->pm4);
+       else
+               shader->pm4 = CALLOC_STRUCT(si_pm4_state);
+
+       return shader->pm4;
+}
+
 static void si_shader_ls(struct si_shader *shader)
 {
        struct si_pm4_state *pm4;
        unsigned vgpr_comp_cnt;
        uint64_t va;
 
-       pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
+       pm4 = si_get_shader_pm4_state(shader);
        if (!pm4)
                return;
 
        va = shader->bo->gpu_address;
        si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, 
RADEON_PRIO_SHADER_BINARY);
 
        /* We need at least 2 components for LS.
         * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
        vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
 
@@ -356,21 +366,21 @@ static void si_shader_ls(struct si_shader *shader)
                           S_00B528_FLOAT_MODE(shader->config.float_mode);
        shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_LS_NUM_USER_SGPR) |
                           
S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
 }
 
 static void si_shader_hs(struct si_shader *shader)
 {
        struct si_pm4_state *pm4;
        uint64_t va;
 
-       pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
+       pm4 = si_get_shader_pm4_state(shader);
        if (!pm4)
                return;
 
        va = shader->bo->gpu_address;
        si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, 
RADEON_PRIO_SHADER_BINARY);
 
        si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
        si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
        si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
                       S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
@@ -384,22 +394,21 @@ static void si_shader_hs(struct si_shader *shader)
 }
 
 static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
 {
        struct si_pm4_state *pm4;
        unsigned num_user_sgprs;
        unsigned vgpr_comp_cnt;
        uint64_t va;
        unsigned oc_lds_en;
 
-       pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
-
+       pm4 = si_get_shader_pm4_state(shader);
        if (!pm4)
                return;
 
        va = shader->bo->gpu_address;
        si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, 
RADEON_PRIO_SHADER_BINARY);
 
        if (shader->selector->type == PIPE_SHADER_VERTEX) {
                vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0;
                num_user_sgprs = SI_ES_NUM_USER_SGPR;
        } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
@@ -460,22 +469,21 @@ static void si_shader_gs(struct si_shader *shader)
        unsigned gs_vert_itemsize = shader->selector->gsvs_vertex_size;
        unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2;
        unsigned gs_num_invocations = shader->selector->gs_num_invocations;
        struct si_pm4_state *pm4;
        uint64_t va;
        unsigned max_stream = shader->selector->max_gs_stream;
 
        /* The GSVS_RING_ITEMSIZE register takes 15 bits */
        assert(gsvs_itemsize < (1 << 15));
 
-       pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
-
+       pm4 = si_get_shader_pm4_state(shader);
        if (!pm4)
                return;
 
        si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, si_vgt_gs_mode(shader));
 
        si_pm4_set_reg(pm4, R_028A60_VGT_GSVS_RING_OFFSET_1, gsvs_itemsize);
        si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize * 
((max_stream >= 2) ? 2 : 1));
        si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize * 
((max_stream >= 3) ? 3 : 1));
 
        si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize * 
(max_stream + 1));
@@ -518,22 +526,21 @@ static void si_shader_vs(struct si_screen *sscreen, 
struct si_shader *shader,
 {
        struct si_pm4_state *pm4;
        unsigned num_user_sgprs;
        unsigned nparams, vgpr_comp_cnt;
        uint64_t va;
        unsigned oc_lds_en;
        unsigned window_space =
           
shader->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
        bool enable_prim_id = si_vs_exports_prim_id(shader);
 
-       pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
-
+       pm4 = si_get_shader_pm4_state(shader);
        if (!pm4)
                return;
 
        /* We always write VGT_GS_MODE in the VS state, because every switch
         * between different shader pipelines involving a different GS or no
         * GS at all involves a switch of the VS (different GS use different
         * copy shaders). On the other hand, when the API switches from a GS to
         * no GS and then back to the same GS used originally, the GS state is
         * not sent again.
         */
@@ -682,22 +689,21 @@ static void si_shader_ps(struct si_shader *shader)
        /* we need to enable at least one of them, otherwise we hang the GPU */
        assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
               G_0286CC_PERSP_CENTER_ENA(input_ena) ||
               G_0286CC_PERSP_CENTROID_ENA(input_ena) ||
               G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) ||
               G_0286CC_LINEAR_SAMPLE_ENA(input_ena) ||
               G_0286CC_LINEAR_CENTER_ENA(input_ena) ||
               G_0286CC_LINEAR_CENTROID_ENA(input_ena) ||
               G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena));
 
-       pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
-
+       pm4 = si_get_shader_pm4_state(shader);
        if (!pm4)
                return;
 
        /* SPI_BARYC_CNTL.POS_FLOAT_LOCATION
         * Possible vaules:
         * 0 -> Position = pixel center
         * 1 -> Position = pixel centroid
         * 2 -> Position = at sample position
         *
         * From GLSL 4.5 specification, section 7.1:
@@ -784,24 +790,20 @@ static void si_shader_ps(struct si_shader *shader)
        else if (info->num_memory_instructions >= 2 ||
                 shader->binary.code_size > 100*4)
                shader->z_order = V_02880C_EARLY_Z_THEN_RE_Z;
        else
                shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
 }
 
 static void si_shader_init_pm4_state(struct si_screen *sscreen,
                                      struct si_shader *shader)
 {
-
-       if (shader->pm4)
-               si_pm4_free_state_simple(shader->pm4);
-
        switch (shader->selector->type) {
        case PIPE_SHADER_VERTEX:
                if (shader->key.vs.as_ls)
                        si_shader_ls(shader);
                else if (shader->key.vs.as_es)
                        si_shader_es(sscreen, shader);
                else
                        si_shader_vs(sscreen, shader, NULL);
                break;
        case PIPE_SHADER_TESS_CTRL:
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to