From: Topi Pohjolainen <[email protected]>

gen9 hardware has a bug in the sampler cache that can cause GPU hangs
whenever an texture with aux compression enabled is in the sampler cache
together with an ASTC5x5 texture.  Because we can't control what the
client binds at any given time, we have two options: resolve the CCS or
decompresss the ASTC.  Doing a CCS resolve is far less drastic and will
likely have a smaller performance impact.

Co-authored-by: Jason Ekstrand <[email protected]>
---
 src/mesa/drivers/dri/i965/brw_blorp.c         |  8 ++
 src/mesa/drivers/dri/i965/brw_context.h       | 10 +++
 src/mesa/drivers/dri/i965/brw_draw.c          | 79 ++++++++++++++++++-
 .../drivers/dri/i965/brw_wm_surface_state.c   |  6 ++
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c |  8 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  3 +-
 6 files changed, 108 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 7476cee43a4..0d305dea83b 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -178,11 +178,19 @@ blorp_surf_for_miptree(struct brw_context *brw,
 
       surf->aux_addr.buffer = mt->aux_buf->bo;
       surf->aux_addr.offset = mt->aux_buf->offset;
+
+      if (!is_render_target && brw->screen->devinfo.gen == 9)
+         gen9_astc5x5_sampler_wa(brw, GEN9_ASTC5X5_WA_TEX_TYPE_AUX);
    } else {
       surf->aux_addr = (struct blorp_address) {
          .buffer = NULL,
       };
       memset(&surf->clear_color, 0, sizeof(surf->clear_color));
+
+      if (!is_render_target && brw->screen->devinfo.gen == 9 &&
+          (mt->format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+           mt->format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5))
+         gen9_astc5x5_sampler_wa(brw, GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5);
    }
    assert((surf->aux_usage == ISL_AUX_USAGE_NONE) ==
           (surf->aux_addr.buffer == NULL));
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index d3b96953467..a7bc3612d75 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -168,6 +168,11 @@ enum brw_cache_id {
    BRW_MAX_CACHE
 };
 
+enum gen9_astc5x5_wa_tex_type {
+   GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5 = 1 << 0,
+   GEN9_ASTC5X5_WA_TEX_TYPE_AUX     = 1 << 1,
+};
+
 enum brw_state_id {
    /* brw_cache_ids must come first - see brw_program_cache.c */
    BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
@@ -1326,6 +1331,8 @@ struct brw_context
     */
    enum isl_aux_usage draw_aux_usage[MAX_DRAW_BUFFERS];
 
+   enum gen9_astc5x5_wa_tex_type gen9_astc5x5_wa_tex_mask;
+
    __DRIcontext *driContext;
    struct intel_screen *screen;
 };
@@ -1350,6 +1357,9 @@ void intel_update_renderbuffers(__DRIcontext *context,
                                 __DRIdrawable *drawable);
 void intel_prepare_render(struct brw_context *brw);
 
+void gen9_astc5x5_sampler_wa(struct brw_context *brw,
+                             enum gen9_astc5x5_wa_tex_type curr_mask);
+
 void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
                                 bool *draw_aux_buffer_disabled);
 
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 71461d7b0a7..d261db81907 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -378,6 +378,39 @@ intel_disable_rb_aux_buffer(struct brw_context *brw,
    return found;
 }
 
+/** Implement the ASTC 5x5 sampler workaround
+ *
+ * Gen9 sampling hardware has a bug where an ASTC 5x5 compressed surface
+ * cannot live in the sampler cache at the same time as an aux compressed
+ * surface.  In order to work around the bug we have to stall rendering with a
+ * CS and pixel scoreboard stall (implicit in the CS stall) and invalidate the
+ * texture cache whenever one of ASTC 5x5 or aux compressed may be in the
+ * sampler cache and we're about to render with something which samples from
+ * the other.
+ *
+ * In the case of a single shader which textures from both ASTC 5x5 and
+ * a texture which is CCS or HiZ compressed, we have to resolve the aux
+ * compressed texture prior to rendering.  This second part is handled in
+ * brw_predraw_resolve_inputs() below.
+ */
+void
+gen9_astc5x5_sampler_wa(struct brw_context *brw,
+                        enum gen9_astc5x5_wa_tex_type curr_mask)
+{
+   if (brw->screen->devinfo.gen != 9)
+      return;
+
+   if (((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
+        (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX)) ||
+       ((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX) &&
+        (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5))) {
+      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
+      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+   }
+
+   brw->gen9_astc5x5_wa_tex_mask = curr_mask;
+}
+
 static void
 mark_textures_used_for_txf(BITSET_WORD *used_for_txf,
                            const struct gl_program *prog)
@@ -417,8 +450,37 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
rendering,
       mark_textures_used_for_txf(used_for_txf, ctx->ComputeProgram._Current);
    }
 
-   /* Resolve depth buffer and render cache of each enabled texture. */
    int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
+
+   bool astc5x5_wa_disable_aux = false;
+   if (brw->screen->devinfo.gen == 9) {
+      /* In order to properly implement the ASTC 5x5 workaround for an
+       * arbitrary draw or dispatch call, we have to walk the entire list of
+       * textures looking for ASTC 5x5.  If there is any ASTC 5x5 in this draw
+       * call, all aux compressed textures must be resolved and have aux
+       * compression disabled while sampling.
+       */
+      enum gen9_astc5x5_wa_tex_type mask = 0;
+      for (int i = 0; i <= maxEnabledUnit; i++) {
+         if (!ctx->Texture.Unit[i]._Current)
+            continue;
+         tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
+         if (!tex_obj || !tex_obj->mt)
+            continue;
+
+         if (tex_obj->mt->aux_usage != ISL_AUX_USAGE_NONE)
+            mask |= GEN9_ASTC5X5_WA_TEX_TYPE_AUX;
+
+         if (tex_obj->_Format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+             tex_obj->_Format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5)
+            mask |= GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5;
+      }
+      gen9_astc5x5_sampler_wa(brw, mask);
+
+      astc5x5_wa_disable_aux = (mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5);
+   }
+
+   /* Resolve depth buffer and render cache of each enabled texture. */
    for (int i = 0; i <= maxEnabledUnit; i++) {
       if (!ctx->Texture.Unit[i]._Current)
         continue;
@@ -452,7 +514,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
rendering,
 
       intel_miptree_prepare_texture(brw, tex_obj->mt, view_format,
                                     min_level, num_levels,
-                                    min_layer, num_layers);
+                                    min_layer, num_layers,
+                                    astc5x5_wa_disable_aux);
 
       /* If any programs are using it with texelFetch, we may need to also do
        * a prepare with an sRGB format to ensure texelFetch works "properly".
@@ -463,7 +526,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
rendering,
          if (txf_format != view_format) {
             intel_miptree_prepare_texture(brw, tex_obj->mt, txf_format,
                                           min_level, num_levels,
-                                          min_layer, num_layers);
+                                          min_layer, num_layers,
+                                          astc5x5_wa_disable_aux);
          }
       }
 
@@ -528,6 +592,12 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw,
        */
       assert(brw->screen->devinfo.gen < 9);
 
+      /* The ASTC5x5 workaround only applies to gen9 platforms which, as per
+       * the above assert, use a different path.
+       */
+      const bool astc5x5_wa_disable_aux =
+         (brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5);
+
       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
          const struct intel_renderbuffer *irb =
             intel_renderbuffer(fb->_ColorDrawBuffers[i]);
@@ -535,7 +605,8 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw,
          if (irb) {
             intel_miptree_prepare_texture(brw, irb->mt, irb->mt->surf.format,
                                           irb->mt_level, 1,
-                                          irb->mt_layer, irb->layer_count);
+                                          irb->mt_layer, irb->layer_count,
+                                          astc5x5_wa_disable_aux);
          }
       }
    }
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 42af41aca32..34ce7125ae3 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -605,6 +605,9 @@ static void brw_update_texture_surface(struct gl_context 
*ctx,
       enum isl_aux_usage aux_usage =
          intel_miptree_texture_aux_usage(brw, mt, format);
 
+      if (brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5)
+         aux_usage = ISL_AUX_USAGE_NONE;
+
       brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
                              surf_offset, surf_index,
                              0);
@@ -1111,6 +1114,9 @@ update_renderbuffer_read_surfaces(struct brw_context *brw)
             if (brw->draw_aux_usage[i] == ISL_AUX_USAGE_NONE)
                aux_usage = ISL_AUX_USAGE_NONE;
 
+            if (brw->gen9_astc5x5_wa_tex_mask & 
GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5)
+               aux_usage = ISL_AUX_USAGE_NONE;
+
             brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
                                    surf_offset, surf_index,
                                    0);
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 983f145afc9..5c53cbd331b 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2601,10 +2601,16 @@ intel_miptree_prepare_texture(struct brw_context *brw,
                               struct intel_mipmap_tree *mt,
                               enum isl_format view_format,
                               uint32_t start_level, uint32_t num_levels,
-                              uint32_t start_layer, uint32_t num_layers)
+                              uint32_t start_layer, uint32_t num_layers,
+                              bool force_disable_aux)
 {
    enum isl_aux_usage aux_usage =
       intel_miptree_texture_aux_usage(brw, mt, view_format);
+
+   /* This is needed for the ASTC5x5 workaround */
+   if (force_disable_aux)
+      aux_usage = ISL_AUX_USAGE_NONE;
+
    bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE;
 
    /* Clear color is specified as ints or floats and the conversion is done by
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index bb7df7ad235..1484d154d38 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -627,7 +627,8 @@ intel_miptree_prepare_texture(struct brw_context *brw,
                               struct intel_mipmap_tree *mt,
                               enum isl_format view_format,
                               uint32_t start_level, uint32_t num_levels,
-                              uint32_t start_layer, uint32_t num_layers);
+                              uint32_t start_layer, uint32_t num_layers,
+                              bool force_disable_aux);
 void
 intel_miptree_prepare_image(struct brw_context *brw,
                             struct intel_mipmap_tree *mt);
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to