Module: Mesa Branch: main Commit: a3a6f6855ed4c9174954770c093e6101b1643eb6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a3a6f6855ed4c9174954770c093e6101b1643eb6
Author: Marek Olšák <[email protected]> Date: Thu Dec 7 15:50:07 2023 -0500 mesa,u_threaded_context: add a fast path for glDrawElements calling TC directly mesa/main/draw.c calls threaded_context to add a draw call, but the caller fills it manually. This way we don't have to fill pipe_draw_info in a local variable and later copy it to tc_batch. tc_batch is filled from draw.c directly. It also eliminates a few conditional jumps thanks to assumptions we can make in DrawElements but not tc_draw_vbo. This decreases the overhead of the GL frontend thread by 1.1%, which has CPU usage of 26%, so it decreases the overhead for that thread by 4.2%. (1.1 / 26) Reviewed-By: Mike Blumenkrantz <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26584> --- src/gallium/auxiliary/util/u_threaded_context.c | 28 +++++++++--- src/gallium/auxiliary/util/u_threaded_context.h | 10 +++++ src/mesa/main/draw.c | 58 +++++++++++++++++++++++-- src/mesa/state_tracker/st_draw.c | 2 +- src/mesa/state_tracker/st_draw.h | 8 ++++ 5 files changed, 96 insertions(+), 10 deletions(-) diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c index 7e1959b932d..17e27bee9ed 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.c +++ b/src/gallium/auxiliary/util/u_threaded_context.c @@ -3693,12 +3693,6 @@ out_of_memory: tc->flushing = false; } -struct tc_draw_single { - struct tc_call_base base; - unsigned index_bias; - struct pipe_draw_info info; -}; - struct tc_draw_single_drawid { struct tc_draw_single base; unsigned drawid_offset; @@ -4256,6 +4250,28 @@ tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info, tc_add_all_gfx_bindings_to_buffer_list(tc); } +struct tc_draw_single * +tc_add_draw_single_call(struct pipe_context *_pipe, + struct pipe_resource *index_bo) +{ + struct threaded_context *tc = threaded_context(_pipe); + + if (tc->options.parse_renderpass_info) + tc_parse_draw(tc); + + struct tc_draw_single *p = + tc_add_call(tc, TC_CALL_draw_single, tc_draw_single); + + if (index_bo) + tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], index_bo); + + /* This must be after tc_add_*call, which can flush the batch. */ + if (unlikely(tc->add_all_gfx_bindings_to_buffer_list)) + tc_add_all_gfx_bindings_to_buffer_list(tc); + + return p; +} + struct tc_draw_vstate_single { struct tc_call_base base; struct pipe_draw_start_count_bias draw; diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h index 64dfeed543b..0ceb9bba75b 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.h +++ b/src/gallium/auxiliary/util/u_threaded_context.h @@ -404,6 +404,12 @@ struct tc_call_base { uint16_t call_id; }; +struct tc_draw_single { + struct tc_call_base base; + unsigned index_bias; + struct pipe_draw_info info; +}; + /** * A token representing an unflushed batch. * @@ -683,6 +689,10 @@ threaded_context_flush(struct pipe_context *_pipe, struct tc_unflushed_batch_token *token, bool prefer_async); +struct tc_draw_single * +tc_add_draw_single_call(struct pipe_context *_pipe, + struct pipe_resource *index_bo); + void tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info, unsigned drawid_offset, diff --git a/src/mesa/main/draw.c b/src/mesa/main/draw.c index bb219986443..f97f70e801d 100644 --- a/src/mesa/main/draw.c +++ b/src/mesa/main/draw.c @@ -44,6 +44,7 @@ #include "api_exec_decl.h" #include "glthread_marshal.h" +#include "cso_cache/cso_context.h" #include "state_tracker/st_context.h" #include "state_tracker/st_draw.h" #include "util/u_draw.h" @@ -1619,8 +1620,6 @@ _mesa_validated_drawrangeelements(struct gl_context *ctx, assert(end == ~0u); } - struct pipe_draw_info info; - struct pipe_draw_start_count_bias draw; unsigned index_size_shift = get_index_size_shift(type); if (index_bo) { @@ -1639,6 +1638,60 @@ _mesa_validated_drawrangeelements(struct gl_context *ctx, } } + st_prepare_draw(ctx, ST_PIPELINE_RENDER_STATE_MASK); + + /* Fast path for a very common DrawElements case: + * - there are no user indices here (always true with glthread) + * - DrawGallium is st_draw_gallium (regular render mode, almost always + * true), which only calls cso_context::draw_vbo + * - the threaded context is enabled while u_vbuf is bypassed (cso_context + * always calls tc_draw_vbo, which is always true with glthread if all + * vertex formats are also supported by the driver) + * - DrawID is 0 (true if glthread isn't unrolling an indirect multi draw, + * which is almost always true) + */ + struct st_context *st = st_context(ctx); + if (index_bo && ctx->Driver.DrawGallium == st_draw_gallium && + st->cso_context->draw_vbo == tc_draw_vbo && ctx->DrawID == 0) { + assert(!st->draw_needs_minmax_index); + struct pipe_resource *index_buffer = + _mesa_get_bufferobj_reference(ctx, index_bo); + struct tc_draw_single *draw = + tc_add_draw_single_call(st->pipe, index_buffer); + bool primitive_restart = ctx->Array._PrimitiveRestart[index_size_shift]; + + /* This must be set exactly like u_threaded_context sets it, not like + * it would be set for draw_vbo. + */ + draw->info.mode = mode; + draw->info.index_size = 1 << index_size_shift; + draw->info.view_mask = 0; + /* Packed section begin. */ + draw->info.primitive_restart = primitive_restart; + draw->info.has_user_indices = false; + draw->info.index_bounds_valid = false; + draw->info.increment_draw_id = false; + draw->info.take_index_buffer_ownership = false; + draw->info.index_bias_varies = false; + draw->info.was_line_loop = false; + draw->info._pad = 0; + /* Packed section end. */ + draw->info.start_instance = baseInstance; + draw->info.instance_count = numInstances; + draw->info.restart_index = + primitive_restart ? ctx->Array._RestartIndex[index_size_shift] : 0; + draw->info.index.resource = index_buffer; + + /* u_threaded_context stores start/count in min/max_index for single draws. */ + draw->info.min_index = (uintptr_t)indices >> index_size_shift; + draw->info.max_index = count; + draw->index_bias = basevertex; + return; + } + + struct pipe_draw_info info; + struct pipe_draw_start_count_bias draw; + info.mode = mode; info.index_size = 1 << index_size_shift; /* Packed section begin. */ @@ -1675,7 +1728,6 @@ _mesa_validated_drawrangeelements(struct gl_context *ctx, info.max_index = end; draw.count = count; - st_prepare_draw(ctx, ST_PIPELINE_RENDER_STATE_MASK); if (!validate_index_bounds(ctx, &info, &draw, 1)) return; diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 6fb82dedca4..0a8ccf0fa62 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -111,7 +111,7 @@ st_prepare_draw(struct gl_context *ctx, uint64_t state_mask) } } -static void +void st_draw_gallium(struct gl_context *ctx, struct pipe_draw_info *info, unsigned drawid_offset, diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h index 121f0a561e2..fb847a86b57 100644 --- a/src/mesa/state_tracker/st_draw.h +++ b/src/mesa/state_tracker/st_draw.h @@ -77,6 +77,14 @@ pointer_to_offset(const void *ptr) void st_prepare_draw(struct gl_context *ctx, uint64_t state_mask); +void +st_draw_gallium(struct gl_context *ctx, + struct pipe_draw_info *info, + unsigned drawid_offset, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws); + bool st_draw_quad(struct st_context *st, float x0, float y0, float x1, float y1, float z,
