I have not yet read the patch, just applied it and run, at one point it locked up again, blancking the screen, and these piglits running:
arb_compute_shader-local-id -auto -fbo arb_shader_image_load_store-bitcast -auto -fbo arb_shader_image_load_store-max-images -auto -fbo arb_shader_image_load_store-max-size --quick -auto -fbo arb_shader_image_load_store-minmax -auto -fbo arb_shader_image_load_store-qualifiers -auto -fbo I couldn't kill the -bitcast process, didn't try the -local-id process, but starting with -max-size I was able to kill -9 some tests and then it resolved itself and piglit continued to run to the end. I have to retry whether I can do the same without this patch, I thinkat least once it worked for me. Best, Gert Am Dienstag, den 07.08.2018, 12:31 +1000 schrieb Dave Airlie: > From: Dave Airlie <[email protected]> > > With the current code, we didn't do the space checks prior > to atomic counter setup emission, but we also didn't add > atomic counters to the space check so we could get a flush > later as well. > > These flushes would be bad, and lead to problems with > parallel tests. We have to ensure the atomic counter copy in, > draw emits and counter copy out are kept in the same command > submission unit. > > This reworks the code to drop some useless masks, make the > counting separate to the emits, and make the space checker > handle atomic counter space. > --- > src/gallium/drivers/r600/evergreen_compute.c | 11 ++++-- > .../drivers/r600/evergreen_hw_context.c | 2 +- > src/gallium/drivers/r600/evergreen_state.c | 38 +++++++++++---- > ---- > src/gallium/drivers/r600/r600_hw_context.c | 7 +++- > src/gallium/drivers/r600/r600_pipe.h | 14 ++++--- > src/gallium/drivers/r600/r600_state_common.c | 13 +++++-- > 6 files changed, 54 insertions(+), 31 deletions(-) > > diff --git a/src/gallium/drivers/r600/evergreen_compute.c > b/src/gallium/drivers/r600/evergreen_compute.c > index 90eae1e2829..a77f58242e3 100644 > --- a/src/gallium/drivers/r600/evergreen_compute.c > +++ b/src/gallium/drivers/r600/evergreen_compute.c > @@ -715,7 +715,6 @@ static void compute_emit_cs(struct r600_context > *rctx, > rctx->cmd_buf_is_compute = true; > } > > - r600_need_cs_space(rctx, 0, true); > if (rctx->cs_shader_state.shader->ir_type == > PIPE_SHADER_IR_TGSI) { > r600_shader_select(&rctx->b.b, rctx- > >cs_shader_state.shader->sel, &compute_dirty); > current = rctx->cs_shader_state.shader->sel- > >current; > @@ -742,16 +741,22 @@ static void compute_emit_cs(struct r600_context > *rctx, > } > rctx->cs_block_grid_sizes[3] = rctx- > >cs_block_grid_sizes[7] = 0; > rctx- > >driver_consts[PIPE_SHADER_COMPUTE].cs_block_grid_size_dirty = true; > + > + evergreen_emit_atomic_buffer_setup_count(rctx, > current, combined_atomics, &atomic_used_mask); > + r600_need_cs_space(rctx, 0, true, > util_bitcount(atomic_used_mask)); > + > if (need_buf_const) { > eg_setup_buffer_constants(rctx, > PIPE_SHADER_COMPUTE); > } > r600_update_driver_const_buffers(rctx, true); > > - if (evergreen_emit_atomic_buffer_setup(rctx, > current, combined_atomics, &atomic_used_mask)) { > + evergreen_emit_atomic_buffer_setup(rctx, true, > combined_atomics, atomic_used_mask); > + if (atomic_used_mask) { > radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, > 0)); > radeon_emit(cs, > EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4)); > } > - } > + } else > + r600_need_cs_space(rctx, 0, true, 0); > > /* Initialize all the compute-related registers. > * > diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c > b/src/gallium/drivers/r600/evergreen_hw_context.c > index d3f3e227c1f..5e0e27b0f16 100644 > --- a/src/gallium/drivers/r600/evergreen_hw_context.c > +++ b/src/gallium/drivers/r600/evergreen_hw_context.c > @@ -109,7 +109,7 @@ void evergreen_cp_dma_clear_buffer(struct > r600_context *rctx, > > r600_need_cs_space(rctx, > 10 + (rctx->b.flags ? > R600_MAX_FLUSH_CS_DWORDS : 0) + > - R600_MAX_PFP_SYNC_ME_DWORDS, > FALSE); > + R600_MAX_PFP_SYNC_ME_DWORDS, > FALSE, 0); > > /* Flush the caches for the first copy only. */ > if (rctx->b.flags) { > diff --git a/src/gallium/drivers/r600/evergreen_state.c > b/src/gallium/drivers/r600/evergreen_state.c > index 57e81e30c27..cc41e114369 100644 > --- a/src/gallium/drivers/r600/evergreen_state.c > +++ b/src/gallium/drivers/r600/evergreen_state.c > @@ -4030,7 +4030,6 @@ static void > evergreen_set_hw_atomic_buffers(struct pipe_context *ctx, > > if (!buffers || !buffers[idx].buffer) { > pipe_resource_reference(&abuf->buffer, > NULL); > - astate->enabled_mask &= ~(1 << i); > continue; > } > buf = &buffers[idx]; > @@ -4038,7 +4037,6 @@ static void > evergreen_set_hw_atomic_buffers(struct pipe_context *ctx, > pipe_resource_reference(&abuf->buffer, buf->buffer); > abuf->buffer_offset = buf->buffer_offset; > abuf->buffer_size = buf->buffer_size; > - astate->enabled_mask |= (1 << i); > } > } > > @@ -4868,20 +4866,15 @@ static void cayman_write_count_to_gds(struct > r600_context *rctx, > radeon_emit(cs, reloc); > } > > -bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, > - struct r600_pipe_shader > *cs_shader, > - struct r600_shader_atomic > *combined_atomics, > - uint8_t *atomic_used_mask_p) > +void evergreen_emit_atomic_buffer_setup_count(struct r600_context > *rctx, > + struct > r600_pipe_shader *cs_shader, > + struct > r600_shader_atomic *combined_atomics, > + uint8_t > *atomic_used_mask_p) > { > - struct r600_atomic_buffer_state *astate = &rctx- > >atomic_buffer_state; > - unsigned pkt_flags = 0; > uint8_t atomic_used_mask = 0; > int i, j, k; > bool is_compute = cs_shader ? true : false; > > - if (is_compute) > - pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; > - > for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) { > uint8_t num_atomic_stage; > struct r600_pipe_shader *pshader; > @@ -4914,8 +4907,25 @@ bool evergreen_emit_atomic_buffer_setup(struct > r600_context *rctx, > } > } > } > + *atomic_used_mask_p = atomic_used_mask; > +} > + > +void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, > + bool is_compute, > + struct r600_shader_atomic > *combined_atomics, > + uint8_t atomic_used_mask) > +{ > + struct r600_atomic_buffer_state *astate = &rctx- > >atomic_buffer_state; > + unsigned pkt_flags = 0; > + uint32_t mask; > + > + if (is_compute) > + pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; > + > + mask = atomic_used_mask; > + if (!mask) > + return; > > - uint32_t mask = atomic_used_mask; > while (mask) { > unsigned atomic_index = u_bit_scan(&mask); > struct r600_shader_atomic *atomic = > &combined_atomics[atomic_index]; > @@ -4927,8 +4937,6 @@ bool evergreen_emit_atomic_buffer_setup(struct > r600_context *rctx, > else > evergreen_emit_set_append_cnt(rctx, atomic, > resource, pkt_flags); > } > - *atomic_used_mask_p = atomic_used_mask; > - return true; > } > > void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, > @@ -4940,7 +4948,7 @@ void evergreen_emit_atomic_buffer_save(struct > r600_context *rctx, > struct r600_atomic_buffer_state *astate = &rctx- > >atomic_buffer_state; > uint32_t pkt_flags = 0; > uint32_t event = EVENT_TYPE_PS_DONE; > - uint32_t mask = astate->enabled_mask; > + uint32_t mask; > uint64_t dst_offset; > unsigned reloc; > > diff --git a/src/gallium/drivers/r600/r600_hw_context.c > b/src/gallium/drivers/r600/r600_hw_context.c > index 1cfc180ad6c..a2f5f637b20 100644 > --- a/src/gallium/drivers/r600/r600_hw_context.c > +++ b/src/gallium/drivers/r600/r600_hw_context.c > @@ -31,7 +31,7 @@ > > > void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, > - boolean count_draw_in) > + boolean count_draw_in, unsigned num_atomics) > { > /* Flush the DMA IB if it's not empty. */ > if (radeon_emitted(ctx->b.dma.cs, 0)) > @@ -61,6 +61,9 @@ void r600_need_cs_space(struct r600_context *ctx, > unsigned num_dw, > num_dw += R600_MAX_FLUSH_CS_DWORDS + > R600_MAX_DRAW_CS_DWORDS; > } > > + /* add atomic counters, 8 pre + 8 post per counter + 16 post > if any counters */ > + num_dw += (num_atomics * 16) + (num_atomics ? 16 : 0); > + > /* Count in r600_suspend_queries. */ > num_dw += ctx->b.num_cs_dw_queries_suspend; > > @@ -526,7 +529,7 @@ void r600_cp_dma_copy_buffer(struct r600_context > *rctx, > > r600_need_cs_space(rctx, > 10 + (rctx->b.flags ? > R600_MAX_FLUSH_CS_DWORDS : 0) + > - 3 + R600_MAX_PFP_SYNC_ME_DWORDS, > FALSE); > + 3 + R600_MAX_PFP_SYNC_ME_DWORDS, > FALSE, 0); > > /* Flush the caches for the first copy only. */ > if (rctx->b.flags) { > diff --git a/src/gallium/drivers/r600/r600_pipe.h > b/src/gallium/drivers/r600/r600_pipe.h > index 6204e3c557b..239005cab7f 100644 > --- a/src/gallium/drivers/r600/r600_pipe.h > +++ b/src/gallium/drivers/r600/r600_pipe.h > @@ -446,8 +446,6 @@ struct r600_shader_state { > }; > > struct r600_atomic_buffer_state { > - uint32_t enabled_mask; > - uint32_t dirty_mask; > struct pipe_shader_buffer buffer[EG_MAX_ATOMIC_BUFFERS]; > }; > > @@ -773,7 +771,7 @@ void r600_context_gfx_flush(void *context, > unsigned flags, > struct pipe_fence_handle **fence); > void r600_begin_new_cs(struct r600_context *ctx); > void r600_flush_emit(struct r600_context *ctx); > -void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, > boolean count_draw_in); > +void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, > boolean count_draw_in, unsigned num_atomics); > void r600_emit_pfp_sync_me(struct r600_context *rctx); > void r600_cp_dma_copy_buffer(struct r600_context *rctx, > struct pipe_resource *dst, uint64_t > dst_offset, > @@ -1067,10 +1065,14 @@ void r600_delete_shader_selector(struct > pipe_context *ctx, > struct r600_pipe_shader_selector > *sel); > > struct r600_shader_atomic; > -bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, > - struct r600_pipe_shader > *cs_shader, > +void evergreen_emit_atomic_buffer_setup_count(struct r600_context > *rctx, > + struct > r600_pipe_shader *cs_shader, > + struct > r600_shader_atomic *combined_atomics, > + uint8_t > *atomic_used_mask_p); > +void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, > + bool is_compute, > struct r600_shader_atomic > *combined_atomics, > - uint8_t > *atomic_used_mask_p); > + uint8_t atomic_used_mask); > void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, > bool is_compute, > struct r600_shader_atomic > *combined_atomics, > diff --git a/src/gallium/drivers/r600/r600_state_common.c > b/src/gallium/drivers/r600/r600_state_common.c > index 402d95838f0..e6c1b0be97c 100644 > --- a/src/gallium/drivers/r600/r600_state_common.c > +++ b/src/gallium/drivers/r600/r600_state_common.c > @@ -2085,8 +2085,9 @@ static void r600_draw_vbo(struct pipe_context > *ctx, const struct pipe_draw_info > : (rctx->tes_shader)? rctx->tes_shader- > >info.properties[TGSI_PROPERTY_TES_PRIM_MODE] > : info->mode; > > - if (rctx->b.chip_class >= EVERGREEN) > - evergreen_emit_atomic_buffer_setup(rctx, NULL, > combined_atomics, &atomic_used_mask); > + if (rctx->b.chip_class >= EVERGREEN) { > + evergreen_emit_atomic_buffer_setup_count(rctx, NULL, > combined_atomics, &atomic_used_mask); > + } > > if (index_size) { > index_offset += info->start * index_size; > @@ -2172,7 +2173,7 @@ static void r600_draw_vbo(struct pipe_context > *ctx, const struct pipe_draw_info > evergreen_setup_tess_constants(rctx, info, > &num_patches); > > /* Emit states. */ > - r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE); > + r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE, > util_bitcount(atomic_used_mask)); > r600_flush_emit(rctx); > > mask = rctx->dirty_atoms; > @@ -2180,6 +2181,10 @@ static void r600_draw_vbo(struct pipe_context > *ctx, const struct pipe_draw_info > r600_emit_atom(rctx, rctx- > >atoms[u_bit_scan64(&mask)]); > } > > + if (rctx->b.chip_class >= EVERGREEN) { > + evergreen_emit_atomic_buffer_setup(rctx, false, > combined_atomics, atomic_used_mask); > + } > + > if (rctx->b.chip_class == CAYMAN) { > /* Copied from radeonsi. */ > unsigned primgroup_size = 128; /* recommended > without a GS */ > @@ -3284,7 +3289,7 @@ static void r600_set_active_query_state(struct > pipe_context *ctx, boolean enable > static void r600_need_gfx_cs_space(struct pipe_context *ctx, > unsigned num_dw, > bool include_draw_vbo) > { > - r600_need_cs_space((struct r600_context*)ctx, num_dw, > include_draw_vbo); > + r600_need_cs_space((struct r600_context*)ctx, num_dw, > include_draw_vbo, 0); > } > > /* keep this at the end of this file, please */ _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
