Decoding the opcode into the appropriate index into the dispatch tables is quite expensive using the radix tree. By keeping a small cache, we can dramatically speed up indirect function dispatch.
World of Padman over the network increased from 28fps to 45fps, with an almost identical increase when run indirectly over a local socket. Which serves as a nice reminder not to do this. Signed-off-by: Chris Wilson <[email protected]> --- glx/indirect_table.c | 9 ++++++--- glx/indirect_table.h | 13 +++++++++---- glx/indirect_util.c | 24 ++++++++++++++++++++---- glx/indirect_util.h | 4 ++-- 4 files changed, 37 insertions(+), 13 deletions(-) diff --git a/glx/indirect_table.c b/glx/indirect_table.c index 01d1da3..747189a 100644 --- a/glx/indirect_table.c +++ b/glx/indirect_table.c @@ -188,9 +188,10 @@ static const void *Single_function_table[112][2] = { /* [ 111] = 167 */ {NULL, NULL}, }; -const struct __glXDispatchInfo Single_dispatch_info = { +struct __glXDispatchInfo Single_dispatch_info = { 8, Single_dispatch_tree, + { { -1 } }, Single_function_table, NULL, NULL @@ -1221,9 +1222,10 @@ static const gl_proto_size_func Render_size_func_table[74] = { __glXDeleteFramebuffersEXTReqSize, }; -const struct __glXDispatchInfo Render_dispatch_info = { +struct __glXDispatchInfo Render_dispatch_info = { 13, Render_dispatch_tree, + { { -1 } }, Render_function_table, Render_size_table, Render_size_func_table @@ -1585,9 +1587,10 @@ static const void *VendorPriv_function_table[104][2] = { /* [ 103] = 65551 */ {NULL, NULL}, }; -const struct __glXDispatchInfo VendorPriv_dispatch_info = { +struct __glXDispatchInfo VendorPriv_dispatch_info = { 17, VendorPriv_dispatch_tree, + { { -1 } }, VendorPriv_function_table, NULL, NULL diff --git a/glx/indirect_table.h b/glx/indirect_table.h index 4af1ccb..97dae3b 100644 --- a/glx/indirect_table.h +++ b/glx/indirect_table.h @@ -46,7 +46,12 @@ struct __glXDispatchInfo { /** */ const int_fast16_t * dispatch_tree; - +#define DISPATCH_CACHE_SIZE 32 + struct dispatch_cache { + uint16_t opcode; + int16_t index; + } dispatch_cache[DISPATCH_CACHE_SIZE]; + /** * Array of protocol decode and dispatch functions index by the opcode * search tree (i.e., \c dispatch_tree). The first element in each pair @@ -99,8 +104,8 @@ struct __glXDispatchInfo { */ #define IS_LEAF_INDEX(x) ((x) <= 0) -extern const struct __glXDispatchInfo Single_dispatch_info; -extern const struct __glXDispatchInfo Render_dispatch_info; -extern const struct __glXDispatchInfo VendorPriv_dispatch_info; +extern struct __glXDispatchInfo Single_dispatch_info; +extern struct __glXDispatchInfo Render_dispatch_info; +extern struct __glXDispatchInfo VendorPriv_dispatch_info; #endif /* INDIRECT_TABLE_H */ diff --git a/glx/indirect_util.c b/glx/indirect_util.c index 46a2706..114d561 100644 --- a/glx/indirect_util.c +++ b/glx/indirect_util.c @@ -200,8 +200,8 @@ __glXSendReplySwap( ClientPtr client, const void * data, size_t elements, static int -get_decode_index(const struct __glXDispatchInfo *dispatch_info, - unsigned opcode) +__get_decode_index(struct __glXDispatchInfo *dispatch_info, + unsigned opcode) { int remaining_bits; int next_remain; @@ -273,8 +273,24 @@ get_decode_index(const struct __glXDispatchInfo *dispatch_info, } +static inline int +get_decode_index(struct __glXDispatchInfo *dispatch_info, + unsigned opcode) +{ + struct dispatch_cache *cache = + &dispatch_info->dispatch_cache[opcode & (DISPATCH_CACHE_SIZE-1)]; + + if (cache->opcode != opcode) { + cache->opcode = opcode; + cache->index = __get_decode_index(dispatch_info, opcode); + } + + return cache->index; +} + + void * -__glXGetProtocolDecodeFunction(const struct __glXDispatchInfo *dispatch_info, +__glXGetProtocolDecodeFunction(struct __glXDispatchInfo *dispatch_info, int opcode, int swapped_version) { const int func_index = get_decode_index(dispatch_info, opcode); @@ -286,7 +302,7 @@ __glXGetProtocolDecodeFunction(const struct __glXDispatchInfo *dispatch_info, int -__glXGetProtocolSizeData(const struct __glXDispatchInfo *dispatch_info, +__glXGetProtocolSizeData(struct __glXDispatchInfo *dispatch_info, int opcode, __GLXrenderSizeData *data) { if (dispatch_info->size_table != NULL) { diff --git a/glx/indirect_util.h b/glx/indirect_util.h index b00727a..a1bcb22 100644 --- a/glx/indirect_util.h +++ b/glx/indirect_util.h @@ -43,11 +43,11 @@ extern void __glXSendReplySwap( ClientPtr client, const void * data, struct __glXDispatchInfo; extern void *__glXGetProtocolDecodeFunction( - const struct __glXDispatchInfo *dispatch_info, int opcode, + struct __glXDispatchInfo *dispatch_info, int opcode, int swapped_version); extern int __glXGetProtocolSizeData( - const struct __glXDispatchInfo *dispatch_info, int opcode, + struct __glXDispatchInfo *dispatch_info, int opcode, __GLXrenderSizeData *data); #endif /* __GLX_INDIRECT_UTIL_H__ */ -- 1.7.2.3 _______________________________________________ [email protected]: X.Org development Archives: http://lists.x.org/archives/xorg-devel Info: http://lists.x.org/mailman/listinfo/xorg-devel
