Reviewed-by: Marek Olšák <[email protected]> Marek
On Tue, Jul 25, 2017 at 4:46 PM, Nicolai Hähnle <[email protected]> wrote: > From: Nicolai Hähnle <[email protected]> > > The number of supported waves per thread group has been officially > reduced to 16 with gfx9 (and trying to use 32 waves causes hangs). > > Cc: [email protected] > --- > src/gallium/drivers/radeon/r600_pipe_common.c | 39 > ++++++++++++++++----------- > 1 file changed, 24 insertions(+), 15 deletions(-) > > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c > b/src/gallium/drivers/radeon/r600_pipe_common.c > index fd67d9a..cc52d6b 100644 > --- a/src/gallium/drivers/radeon/r600_pipe_common.c > +++ b/src/gallium/drivers/radeon/r600_pipe_common.c > @@ -1024,6 +1024,25 @@ const char *r600_get_llvm_processor_name(enum > radeon_family family) > } > } > > +static unsigned get_max_threads_per_block(struct r600_common_screen *screen, > + enum pipe_shader_ir ir_type) > +{ > + if (ir_type != PIPE_SHADER_IR_TGSI) > + return 256; > + > + /* Only 16 waves per thread-group on gfx9. */ > + if (screen->chip_class >= GFX9) > + return 1024; > + > + /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice > + * round number. > + */ > + if (screen->chip_class >= SI) > + return 2048; > + > + return 256; > +} > + > static int r600_get_compute_param(struct pipe_screen *screen, > enum pipe_shader_ir ir_type, > enum pipe_compute_cap param, > @@ -1078,27 +1097,17 @@ static int r600_get_compute_param(struct pipe_screen > *screen, > case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: > if (ret) { > uint64_t *block_size = ret; > - if (rscreen->chip_class >= SI && > - ir_type == PIPE_SHADER_IR_TGSI) { > - block_size[0] = 2048; > - block_size[1] = 2048; > - block_size[2] = 2048; > - } else { > - block_size[0] = 256; > - block_size[1] = 256; > - block_size[2] = 256; > - } > + unsigned threads_per_block = > get_max_threads_per_block(rscreen, ir_type); > + block_size[0] = threads_per_block; > + block_size[1] = threads_per_block; > + block_size[2] = threads_per_block; > } > return 3 * sizeof(uint64_t); > > case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: > if (ret) { > uint64_t *max_threads_per_block = ret; > - if (rscreen->chip_class >= SI && > - ir_type == PIPE_SHADER_IR_TGSI) > - *max_threads_per_block = 2048; > - else > - *max_threads_per_block = 256; > + *max_threads_per_block = > get_max_threads_per_block(rscreen, ir_type); > } > return sizeof(uint64_t); > case PIPE_COMPUTE_CAP_ADDRESS_BITS: > -- > 2.9.3 > > _______________________________________________ > mesa-stable mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/mesa-stable _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
