Looks great to me. (I think I even mentioned it looked odd to me that stack workarounds looked different on non-sb when I extended the sb workaround logic to exclude hemlock...)
Reviewed-by: Roland Scheidegger <[email protected]> Am 09.03.2018 um 07:09 schrieb Dave Airlie: > From: Dave Airlie <[email protected]> > > This is ported from the sb backend, there are some issues with > evergreen stacks on the boundary between entries and ALU_PUSH_BEFORE > instructions. > > Whenever we are going to use a push before, we check the stack > usage and if we have to use the workaround, then we switch to > a separate push. > > I noticed this problem dealing with some of the soft fp64 shaders, > in nosb mode, they are quite stack happy. > > This fixes all the glitches and inconsistencies I've seen with them > > Signed-off-by: Dave Airlie <[email protected]> > --- > src/gallium/drivers/r600/r600_shader.c | 39 > +++++++++++++++++++++++++++------- > 1 file changed, 31 insertions(+), 8 deletions(-) > > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index 48750fb..3ca7890 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -377,7 +377,7 @@ struct r600_shader_tgsi_instruction { > static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct > pipe_stream_output_info *so, int stream, bool ind); > static const struct r600_shader_tgsi_instruction > r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], > cm_shader_tgsi_instruction[]; > static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); > -static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned > reason); > +static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned > reason); > static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); > static int tgsi_else(struct r600_shader_ctx *ctx); > static int tgsi_endif(struct r600_shader_ctx *ctx); > @@ -393,6 +393,15 @@ static void r600_bytecode_src(struct > r600_bytecode_alu_src *bc_src, > static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned > temp_reg, > unsigned dst_reg, unsigned mask); > > +static bool ctx_needs_stack_workaround_8xx(struct r600_shader_ctx *ctx) > +{ > + if (ctx->bc->family == CHIP_HEMLOCK || > + ctx->bc->family == CHIP_CYPRESS || > + ctx->bc->family == CHIP_JUNIPER) > + return false; > + return true; > +} > + > static bool ctx_has_doubles(struct r600_shader_ctx *ctx) > { > if (ctx->bc->family == CHIP_ARUBA || > @@ -10182,7 +10191,7 @@ static int pops(struct r600_shader_ctx *ctx, int pops) > return 0; > } > > -static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, > +static inline int callstack_update_max_depth(struct r600_shader_ctx *ctx, > unsigned reason) > { > struct r600_stack_info *stack = &ctx->bc->stack; > @@ -10200,7 +10209,7 @@ static inline void callstack_update_max_depth(struct > r600_shader_ctx *ctx, > /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 > elements on > * the stack must be reserved to hold the current > active/continue > * masks */ > - if (reason == FC_PUSH_VPM) { > + if (reason == FC_PUSH_VPM || stack->push > 0) { > elements += 2; > } > break; > @@ -10226,7 +10235,7 @@ static inline void callstack_update_max_depth(struct > r600_shader_ctx *ctx, > * NOTE: it seems we also need to reserve additional element > in some > * other cases, e.g. when we have 4 levels of PUSH_VPM in > the shader, > * then STACK_SIZE should be 2 instead of 1 */ > - if (reason == FC_PUSH_VPM) { > + if (reason == FC_PUSH_VPM || stack->push > 0) { > elements += 1; > } > break; > @@ -10245,6 +10254,7 @@ static inline void callstack_update_max_depth(struct > r600_shader_ctx *ctx, > > if (entries > stack->max_entries) > stack->max_entries = entries; > + return elements; > } > > static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned > reason) > @@ -10268,7 +10278,7 @@ static inline void callstack_pop(struct > r600_shader_ctx *ctx, unsigned reason) > } > } > > -static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned > reason) > +static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned > reason) > { > switch (reason) { > case FC_PUSH_VPM: > @@ -10276,6 +10286,7 @@ static inline void callstack_push(struct > r600_shader_ctx *ctx, unsigned reason) > break; > case FC_PUSH_WQM: > ++ctx->bc->stack.push_wqm; > + break; > case FC_LOOP: > ++ctx->bc->stack.loop; > break; > @@ -10283,7 +10294,7 @@ static inline void callstack_push(struct > r600_shader_ctx *ctx, unsigned reason) > assert(0); > } > > - callstack_update_max_depth(ctx, reason); > + return callstack_update_max_depth(ctx, reason); > } > > static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) > @@ -10367,12 +10378,25 @@ static int emit_if(struct r600_shader_ctx *ctx, int > opcode, > struct r600_bytecode_alu_src *src) > { > int alu_type = CF_OP_ALU_PUSH_BEFORE; > + bool needs_workaround = false; > + int elems = callstack_push(ctx, FC_PUSH_VPM); > + > + if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) > + needs_workaround = true; > + > + if (ctx->bc->chip_class == EVERGREEN && > ctx_needs_stack_workaround_8xx(ctx)) { > + unsigned dmod1 = (elems - 1) % ctx->bc->stack.entry_size; > + unsigned dmod2 = (elems) % ctx->bc->stack.entry_size; > + > + if (elems && (!dmod1 || !dmod2)) > + needs_workaround = true; > + } > > /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by > * LOOP_STARTxxx for nested loops may put the branch stack into a state > * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this > * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */ > - if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) { > + if (needs_workaround) { > r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH); > ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; > alu_type = CF_OP_ALU; > @@ -10384,7 +10408,6 @@ static int emit_if(struct r600_shader_ctx *ctx, int > opcode, > > fc_pushlevel(ctx, FC_IF); > > - callstack_push(ctx, FC_PUSH_VPM); > return 0; > } > > _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
