Module: Mesa
Branch: main
Commit: a3a6f6855ed4c9174954770c093e6101b1643eb6
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a3a6f6855ed4c9174954770c093e6101b1643eb6

Author: Marek Olšák <[email protected]>
Date:   Thu Dec  7 15:50:07 2023 -0500

mesa,u_threaded_context: add a fast path for glDrawElements calling TC directly

mesa/main/draw.c calls threaded_context to add a draw call, but the caller
fills it manually.

This way we don't have to fill pipe_draw_info in a local variable and later
copy it to tc_batch. tc_batch is filled from draw.c directly.

It also eliminates a few conditional jumps thanks to assumptions we can make
in DrawElements but not tc_draw_vbo.

This decreases the overhead of the GL frontend thread by 1.1%, which has
CPU usage of 26%, so it decreases the overhead for that thread by 4.2%.
(1.1 / 26)

Reviewed-By: Mike Blumenkrantz <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26584>

---

 src/gallium/auxiliary/util/u_threaded_context.c | 28 +++++++++---
 src/gallium/auxiliary/util/u_threaded_context.h | 10 +++++
 src/mesa/main/draw.c                            | 58 +++++++++++++++++++++++--
 src/mesa/state_tracker/st_draw.c                |  2 +-
 src/mesa/state_tracker/st_draw.h                |  8 ++++
 5 files changed, 96 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_threaded_context.c 
b/src/gallium/auxiliary/util/u_threaded_context.c
index 7e1959b932d..17e27bee9ed 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -3693,12 +3693,6 @@ out_of_memory:
    tc->flushing = false;
 }
 
-struct tc_draw_single {
-   struct tc_call_base base;
-   unsigned index_bias;
-   struct pipe_draw_info info;
-};
-
 struct tc_draw_single_drawid {
    struct tc_draw_single base;
    unsigned drawid_offset;
@@ -4256,6 +4250,28 @@ tc_draw_vbo(struct pipe_context *_pipe, const struct 
pipe_draw_info *info,
       tc_add_all_gfx_bindings_to_buffer_list(tc);
 }
 
+struct tc_draw_single *
+tc_add_draw_single_call(struct pipe_context *_pipe,
+                        struct pipe_resource *index_bo)
+{
+   struct threaded_context *tc = threaded_context(_pipe);
+
+   if (tc->options.parse_renderpass_info)
+      tc_parse_draw(tc);
+
+   struct tc_draw_single *p =
+      tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
+
+   if (index_bo)
+      tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], 
index_bo);
+
+   /* This must be after tc_add_*call, which can flush the batch. */
+   if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
+      tc_add_all_gfx_bindings_to_buffer_list(tc);
+
+   return p;
+}
+
 struct tc_draw_vstate_single {
    struct tc_call_base base;
    struct pipe_draw_start_count_bias draw;
diff --git a/src/gallium/auxiliary/util/u_threaded_context.h 
b/src/gallium/auxiliary/util/u_threaded_context.h
index 64dfeed543b..0ceb9bba75b 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.h
+++ b/src/gallium/auxiliary/util/u_threaded_context.h
@@ -404,6 +404,12 @@ struct tc_call_base {
    uint16_t call_id;
 };
 
+struct tc_draw_single {
+   struct tc_call_base base;
+   unsigned index_bias;
+   struct pipe_draw_info info;
+};
+
 /**
  * A token representing an unflushed batch.
  *
@@ -683,6 +689,10 @@ threaded_context_flush(struct pipe_context *_pipe,
                        struct tc_unflushed_batch_token *token,
                        bool prefer_async);
 
+struct tc_draw_single *
+tc_add_draw_single_call(struct pipe_context *_pipe,
+                        struct pipe_resource *index_bo);
+
 void
 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
             unsigned drawid_offset,
diff --git a/src/mesa/main/draw.c b/src/mesa/main/draw.c
index bb219986443..f97f70e801d 100644
--- a/src/mesa/main/draw.c
+++ b/src/mesa/main/draw.c
@@ -44,6 +44,7 @@
 #include "api_exec_decl.h"
 #include "glthread_marshal.h"
 
+#include "cso_cache/cso_context.h"
 #include "state_tracker/st_context.h"
 #include "state_tracker/st_draw.h"
 #include "util/u_draw.h"
@@ -1619,8 +1620,6 @@ _mesa_validated_drawrangeelements(struct gl_context *ctx,
       assert(end == ~0u);
    }
 
-   struct pipe_draw_info info;
-   struct pipe_draw_start_count_bias draw;
    unsigned index_size_shift = get_index_size_shift(type);
 
    if (index_bo) {
@@ -1639,6 +1638,60 @@ _mesa_validated_drawrangeelements(struct gl_context *ctx,
       }
    }
 
+   st_prepare_draw(ctx, ST_PIPELINE_RENDER_STATE_MASK);
+
+   /* Fast path for a very common DrawElements case:
+    * - there are no user indices here (always true with glthread)
+    * - DrawGallium is st_draw_gallium (regular render mode, almost always
+    *   true), which only calls cso_context::draw_vbo
+    * - the threaded context is enabled while u_vbuf is bypassed (cso_context
+    *   always calls tc_draw_vbo, which is always true with glthread if all
+    *   vertex formats are also supported by the driver)
+    * - DrawID is 0 (true if glthread isn't unrolling an indirect multi draw,
+    *   which is almost always true)
+    */
+   struct st_context *st = st_context(ctx);
+   if (index_bo && ctx->Driver.DrawGallium == st_draw_gallium &&
+       st->cso_context->draw_vbo == tc_draw_vbo && ctx->DrawID == 0) {
+      assert(!st->draw_needs_minmax_index);
+      struct pipe_resource *index_buffer =
+         _mesa_get_bufferobj_reference(ctx, index_bo);
+      struct tc_draw_single *draw =
+         tc_add_draw_single_call(st->pipe, index_buffer);
+      bool primitive_restart = ctx->Array._PrimitiveRestart[index_size_shift];
+
+      /* This must be set exactly like u_threaded_context sets it, not like
+       * it would be set for draw_vbo.
+       */
+      draw->info.mode = mode;
+      draw->info.index_size = 1 << index_size_shift;
+      draw->info.view_mask = 0;
+      /* Packed section begin. */
+      draw->info.primitive_restart = primitive_restart;
+      draw->info.has_user_indices = false;
+      draw->info.index_bounds_valid = false;
+      draw->info.increment_draw_id = false;
+      draw->info.take_index_buffer_ownership = false;
+      draw->info.index_bias_varies = false;
+      draw->info.was_line_loop = false;
+      draw->info._pad = 0;
+      /* Packed section end. */
+      draw->info.start_instance = baseInstance;
+      draw->info.instance_count = numInstances;
+      draw->info.restart_index =
+         primitive_restart ? ctx->Array._RestartIndex[index_size_shift] : 0;
+      draw->info.index.resource = index_buffer;
+
+      /* u_threaded_context stores start/count in min/max_index for single 
draws. */
+      draw->info.min_index = (uintptr_t)indices >> index_size_shift;
+      draw->info.max_index = count;
+      draw->index_bias = basevertex;
+      return;
+   }
+
+   struct pipe_draw_info info;
+   struct pipe_draw_start_count_bias draw;
+
    info.mode = mode;
    info.index_size = 1 << index_size_shift;
    /* Packed section begin. */
@@ -1675,7 +1728,6 @@ _mesa_validated_drawrangeelements(struct gl_context *ctx,
    info.max_index = end;
    draw.count = count;
 
-   st_prepare_draw(ctx, ST_PIPELINE_RENDER_STATE_MASK);
    if (!validate_index_bounds(ctx, &info, &draw, 1))
       return;
 
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 6fb82dedca4..0a8ccf0fa62 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -111,7 +111,7 @@ st_prepare_draw(struct gl_context *ctx, uint64_t state_mask)
    }
 }
 
-static void
+void
 st_draw_gallium(struct gl_context *ctx,
                 struct pipe_draw_info *info,
                 unsigned drawid_offset,
diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h
index 121f0a561e2..fb847a86b57 100644
--- a/src/mesa/state_tracker/st_draw.h
+++ b/src/mesa/state_tracker/st_draw.h
@@ -77,6 +77,14 @@ pointer_to_offset(const void *ptr)
 void
 st_prepare_draw(struct gl_context *ctx, uint64_t state_mask);
 
+void
+st_draw_gallium(struct gl_context *ctx,
+                struct pipe_draw_info *info,
+                unsigned drawid_offset,
+                const struct pipe_draw_indirect_info *indirect,
+                const struct pipe_draw_start_count_bias *draws,
+                unsigned num_draws);
+
 bool
 st_draw_quad(struct st_context *st,
              float x0, float y0, float x1, float y1, float z,

Reply via email to