I have not yet read the patch, just applied it and run, at one point it
locked up again, blancking the screen, and these piglits running: 

   arb_compute_shader-local-id -auto -fbo
   arb_shader_image_load_store-bitcast -auto -fbo
   arb_shader_image_load_store-max-images -auto -fbo
   arb_shader_image_load_store-max-size --quick -auto -fbo
   arb_shader_image_load_store-minmax -auto -fbo
   arb_shader_image_load_store-qualifiers -auto -fbo
 
I couldn't kill the -bitcast process, didn't try the -local-id process,
but starting with -max-size I was able to kill -9 some tests and then
it resolved itself and piglit continued to run to the end.  

I have to retry whether I can do the same without this patch, I thinkat least 
once it worked for me. 

Best, 
Gert 

Am Dienstag, den 07.08.2018, 12:31 +1000 schrieb Dave Airlie:
> From: Dave Airlie <[email protected]>
> 
> With the current code, we didn't do the space checks prior
> to atomic counter setup emission, but we also didn't add
> atomic counters to the space check so we could get a flush
> later as well.
> 
> These flushes would be bad, and lead to problems with
> parallel tests. We have to ensure the atomic counter copy in,
> draw emits and counter copy out are kept in the same command
> submission unit.
> 
> This reworks the code to drop some useless masks, make the
> counting separate to the emits, and make the space checker
> handle atomic counter space.
> ---
>  src/gallium/drivers/r600/evergreen_compute.c  | 11 ++++--
>  .../drivers/r600/evergreen_hw_context.c       |  2 +-
>  src/gallium/drivers/r600/evergreen_state.c    | 38 +++++++++++----
> ----
>  src/gallium/drivers/r600/r600_hw_context.c    |  7 +++-
>  src/gallium/drivers/r600/r600_pipe.h          | 14 ++++---
>  src/gallium/drivers/r600/r600_state_common.c  | 13 +++++--
>  6 files changed, 54 insertions(+), 31 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/evergreen_compute.c
> b/src/gallium/drivers/r600/evergreen_compute.c
> index 90eae1e2829..a77f58242e3 100644
> --- a/src/gallium/drivers/r600/evergreen_compute.c
> +++ b/src/gallium/drivers/r600/evergreen_compute.c
> @@ -715,7 +715,6 @@ static void compute_emit_cs(struct r600_context
> *rctx,
>               rctx->cmd_buf_is_compute = true;
>       }
>  
> -     r600_need_cs_space(rctx, 0, true);
>       if (rctx->cs_shader_state.shader->ir_type ==
> PIPE_SHADER_IR_TGSI) {
>               r600_shader_select(&rctx->b.b, rctx-
> >cs_shader_state.shader->sel, &compute_dirty);
>               current = rctx->cs_shader_state.shader->sel-
> >current;
> @@ -742,16 +741,22 @@ static void compute_emit_cs(struct r600_context
> *rctx,
>               }
>               rctx->cs_block_grid_sizes[3] = rctx-
> >cs_block_grid_sizes[7] = 0;
>               rctx-
> >driver_consts[PIPE_SHADER_COMPUTE].cs_block_grid_size_dirty = true;
> +
> +             evergreen_emit_atomic_buffer_setup_count(rctx,
> current, combined_atomics, &atomic_used_mask);
> +             r600_need_cs_space(rctx, 0, true,
> util_bitcount(atomic_used_mask));
> +
>               if (need_buf_const) {
>                       eg_setup_buffer_constants(rctx,
> PIPE_SHADER_COMPUTE);
>               }
>               r600_update_driver_const_buffers(rctx, true);
>  
> -             if (evergreen_emit_atomic_buffer_setup(rctx,
> current, combined_atomics, &atomic_used_mask)) {
> +             evergreen_emit_atomic_buffer_setup(rctx, true,
> combined_atomics, atomic_used_mask);
> +             if (atomic_used_mask) {
>                       radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0,
> 0));
>                       radeon_emit(cs,
> EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
>               }
> -     }
> +     } else
> +             r600_need_cs_space(rctx, 0, true, 0);
>  
>       /* Initialize all the compute-related registers.
>        *
> diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c
> b/src/gallium/drivers/r600/evergreen_hw_context.c
> index d3f3e227c1f..5e0e27b0f16 100644
> --- a/src/gallium/drivers/r600/evergreen_hw_context.c
> +++ b/src/gallium/drivers/r600/evergreen_hw_context.c
> @@ -109,7 +109,7 @@ void evergreen_cp_dma_clear_buffer(struct
> r600_context *rctx,
>  
>               r600_need_cs_space(rctx,
>                                  10 + (rctx->b.flags ?
> R600_MAX_FLUSH_CS_DWORDS : 0) +
> -                                R600_MAX_PFP_SYNC_ME_DWORDS,
> FALSE);
> +                                R600_MAX_PFP_SYNC_ME_DWORDS,
> FALSE, 0);
>  
>               /* Flush the caches for the first copy only. */
>               if (rctx->b.flags) {
> diff --git a/src/gallium/drivers/r600/evergreen_state.c
> b/src/gallium/drivers/r600/evergreen_state.c
> index 57e81e30c27..cc41e114369 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -4030,7 +4030,6 @@ static void
> evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
>  
>               if (!buffers || !buffers[idx].buffer) {
>                       pipe_resource_reference(&abuf->buffer,
> NULL);
> -                     astate->enabled_mask &= ~(1 << i);
>                       continue;
>               }
>               buf = &buffers[idx];
> @@ -4038,7 +4037,6 @@ static void
> evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
>               pipe_resource_reference(&abuf->buffer, buf->buffer);
>               abuf->buffer_offset = buf->buffer_offset;
>               abuf->buffer_size = buf->buffer_size;
> -             astate->enabled_mask |= (1 << i);
>       }
>  }
>  
> @@ -4868,20 +4866,15 @@ static void cayman_write_count_to_gds(struct
> r600_context *rctx,
>       radeon_emit(cs, reloc);
>  }
>  
> -bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
> -                                     struct r600_pipe_shader
> *cs_shader,
> -                                     struct r600_shader_atomic
> *combined_atomics,
> -                                     uint8_t *atomic_used_mask_p)
> +void evergreen_emit_atomic_buffer_setup_count(struct r600_context
> *rctx,
> +                                           struct
> r600_pipe_shader *cs_shader,
> +                                           struct
> r600_shader_atomic *combined_atomics,
> +                                           uint8_t
> *atomic_used_mask_p)
>  {
> -     struct r600_atomic_buffer_state *astate = &rctx-
> >atomic_buffer_state;
> -     unsigned pkt_flags = 0;
>       uint8_t atomic_used_mask = 0;
>       int i, j, k;
>       bool is_compute = cs_shader ? true : false;
>  
> -     if (is_compute)
> -             pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
> -
>       for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) {
>               uint8_t num_atomic_stage;
>               struct r600_pipe_shader *pshader;
> @@ -4914,8 +4907,25 @@ bool evergreen_emit_atomic_buffer_setup(struct
> r600_context *rctx,
>                       }
>               }
>       }
> +     *atomic_used_mask_p = atomic_used_mask;
> +}
> +
> +void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
> +                                     bool is_compute,
> +                                     struct r600_shader_atomic
> *combined_atomics,
> +                                     uint8_t atomic_used_mask)
> +{
> +     struct r600_atomic_buffer_state *astate = &rctx-
> >atomic_buffer_state;
> +     unsigned pkt_flags = 0;
> +     uint32_t mask;
> +
> +     if (is_compute)
> +             pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
> +
> +     mask = atomic_used_mask;
> +     if (!mask)
> +             return;
>  
> -     uint32_t mask = atomic_used_mask;
>       while (mask) {
>               unsigned atomic_index = u_bit_scan(&mask);
>               struct r600_shader_atomic *atomic =
> &combined_atomics[atomic_index];
> @@ -4927,8 +4937,6 @@ bool evergreen_emit_atomic_buffer_setup(struct
> r600_context *rctx,
>               else
>                       evergreen_emit_set_append_cnt(rctx, atomic,
> resource, pkt_flags);
>       }
> -     *atomic_used_mask_p = atomic_used_mask;
> -     return true;
>  }
>  
>  void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
> @@ -4940,7 +4948,7 @@ void evergreen_emit_atomic_buffer_save(struct
> r600_context *rctx,
>       struct r600_atomic_buffer_state *astate = &rctx-
> >atomic_buffer_state;
>       uint32_t pkt_flags = 0;
>       uint32_t event = EVENT_TYPE_PS_DONE;
> -     uint32_t mask = astate->enabled_mask;
> +     uint32_t mask;
>       uint64_t dst_offset;
>       unsigned reloc;
>  
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c
> b/src/gallium/drivers/r600/r600_hw_context.c
> index 1cfc180ad6c..a2f5f637b20 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -31,7 +31,7 @@
>  
>  
>  void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
> -                     boolean count_draw_in)
> +                     boolean count_draw_in, unsigned num_atomics)
>  {
>       /* Flush the DMA IB if it's not empty. */
>       if (radeon_emitted(ctx->b.dma.cs, 0))
> @@ -61,6 +61,9 @@ void r600_need_cs_space(struct r600_context *ctx,
> unsigned num_dw,
>               num_dw += R600_MAX_FLUSH_CS_DWORDS +
> R600_MAX_DRAW_CS_DWORDS;
>       }
>  
> +     /* add atomic counters, 8 pre + 8 post per counter + 16 post
> if any counters */
> +     num_dw += (num_atomics * 16) + (num_atomics ? 16 : 0);
> +
>       /* Count in r600_suspend_queries. */
>       num_dw += ctx->b.num_cs_dw_queries_suspend;
>  
> @@ -526,7 +529,7 @@ void r600_cp_dma_copy_buffer(struct r600_context
> *rctx,
>  
>               r600_need_cs_space(rctx,
>                                  10 + (rctx->b.flags ?
> R600_MAX_FLUSH_CS_DWORDS : 0) +
> -                                3 + R600_MAX_PFP_SYNC_ME_DWORDS,
> FALSE);
> +                                3 + R600_MAX_PFP_SYNC_ME_DWORDS,
> FALSE, 0);
>  
>               /* Flush the caches for the first copy only. */
>               if (rctx->b.flags) {
> diff --git a/src/gallium/drivers/r600/r600_pipe.h
> b/src/gallium/drivers/r600/r600_pipe.h
> index 6204e3c557b..239005cab7f 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -446,8 +446,6 @@ struct r600_shader_state {
>  };
>  
>  struct r600_atomic_buffer_state {
> -     uint32_t enabled_mask;
> -     uint32_t dirty_mask;
>       struct pipe_shader_buffer buffer[EG_MAX_ATOMIC_BUFFERS];
>  };
>  
> @@ -773,7 +771,7 @@ void r600_context_gfx_flush(void *context,
> unsigned flags,
>                           struct pipe_fence_handle **fence);
>  void r600_begin_new_cs(struct r600_context *ctx);
>  void r600_flush_emit(struct r600_context *ctx);
> -void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
> boolean count_draw_in);
> +void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
> boolean count_draw_in, unsigned num_atomics);
>  void r600_emit_pfp_sync_me(struct r600_context *rctx);
>  void r600_cp_dma_copy_buffer(struct r600_context *rctx,
>                            struct pipe_resource *dst, uint64_t
> dst_offset,
> @@ -1067,10 +1065,14 @@ void r600_delete_shader_selector(struct
> pipe_context *ctx,
>                                struct r600_pipe_shader_selector
> *sel);
>  
>  struct r600_shader_atomic;
> -bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
> -                                     struct r600_pipe_shader
> *cs_shader,
> +void evergreen_emit_atomic_buffer_setup_count(struct r600_context
> *rctx,
> +                                           struct
> r600_pipe_shader *cs_shader,
> +                                           struct
> r600_shader_atomic *combined_atomics,
> +                                           uint8_t
> *atomic_used_mask_p);
> +void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
> +                                     bool is_compute,
>                                       struct r600_shader_atomic
> *combined_atomics,
> -                                     uint8_t
> *atomic_used_mask_p);
> +                                     uint8_t atomic_used_mask);
>  void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
>                                      bool is_compute,
>                                      struct r600_shader_atomic
> *combined_atomics,
> diff --git a/src/gallium/drivers/r600/r600_state_common.c
> b/src/gallium/drivers/r600/r600_state_common.c
> index 402d95838f0..e6c1b0be97c 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -2085,8 +2085,9 @@ static void r600_draw_vbo(struct pipe_context
> *ctx, const struct pipe_draw_info
>               : (rctx->tes_shader)? rctx->tes_shader-
> >info.properties[TGSI_PROPERTY_TES_PRIM_MODE]
>               : info->mode;
>  
> -     if (rctx->b.chip_class >= EVERGREEN)
> -             evergreen_emit_atomic_buffer_setup(rctx, NULL,
> combined_atomics, &atomic_used_mask);
> +     if (rctx->b.chip_class >= EVERGREEN) {
> +             evergreen_emit_atomic_buffer_setup_count(rctx, NULL,
> combined_atomics, &atomic_used_mask);
> +     }
>  
>       if (index_size) {
>               index_offset += info->start * index_size;
> @@ -2172,7 +2173,7 @@ static void r600_draw_vbo(struct pipe_context
> *ctx, const struct pipe_draw_info
>               evergreen_setup_tess_constants(rctx, info,
> &num_patches);
>  
>       /* Emit states. */
> -     r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE);
> +     r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE,
> util_bitcount(atomic_used_mask));
>       r600_flush_emit(rctx);
>  
>       mask = rctx->dirty_atoms;
> @@ -2180,6 +2181,10 @@ static void r600_draw_vbo(struct pipe_context
> *ctx, const struct pipe_draw_info
>               r600_emit_atom(rctx, rctx-
> >atoms[u_bit_scan64(&mask)]);
>       }
>  
> +     if (rctx->b.chip_class >= EVERGREEN) {
> +             evergreen_emit_atomic_buffer_setup(rctx, false,
> combined_atomics, atomic_used_mask);
> +     }
> +             
>       if (rctx->b.chip_class == CAYMAN) {
>               /* Copied from radeonsi. */
>               unsigned primgroup_size = 128; /* recommended
> without a GS */
> @@ -3284,7 +3289,7 @@ static void r600_set_active_query_state(struct
> pipe_context *ctx, boolean enable
>  static void r600_need_gfx_cs_space(struct pipe_context *ctx,
> unsigned num_dw,
>                                     bool include_draw_vbo)
>  {
> -     r600_need_cs_space((struct r600_context*)ctx, num_dw,
> include_draw_vbo);
> +     r600_need_cs_space((struct r600_context*)ctx, num_dw,
> include_draw_vbo, 0);
>  }
>  
>  /* keep this at the end of this file, please */
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to