Am 01.02.2018 um 09:21 schrieb Dave Airlie: > From: Dave Airlie <[email protected]> > > This passes the CTS and piglit tests. > > This also disable sb for helper invocations until it doesn't > mess up the VPM flags. > > Thanks to Ilia and Glenn for advice, and Roland for working > out the working evergreen path. > --- > src/gallium/drivers/r600/r600_asm.c | 7 +- > src/gallium/drivers/r600/r600_isa.c | 1 + > src/gallium/drivers/r600/r600_isa.h | 5 +- > src/gallium/drivers/r600/r600_shader.c | 113 > +++++++++++++++++++++++++++++++++ > src/gallium/drivers/r600/r600_shader.h | 1 + > src/gallium/drivers/r600/r600_sq.h | 2 + > 6 files changed, 126 insertions(+), 3 deletions(-) > > diff --git a/src/gallium/drivers/r600/r600_asm.c > b/src/gallium/drivers/r600/r600_asm.c > index 21d069d..ec2d34e 100644 > --- a/src/gallium/drivers/r600/r600_asm.c > +++ b/src/gallium/drivers/r600/r600_asm.c > @@ -2099,9 +2099,12 @@ void r600_bytecode_disasm(struct r600_bytecode *bc) > fprintf(stderr, "%04d %08X %08X %s ", id, > bc->bytecode[id], > bc->bytecode[id + 1], > cfop->name); > fprintf(stderr, "%d @%d ", cf->ndw / 4, > cf->addr); > - fprintf(stderr, "\n"); > + if (cf->vpm) > + fprintf(stderr, "VPM "); > if (cf->end_of_program) > fprintf(stderr, "EOP "); > + fprintf(stderr, "\n"); > + > } else if (cfop->flags & CF_EXP) { > int o = 0; > const char *exp_type[] = {"PIXEL", "POS ", > "PARAM"}; > @@ -2198,6 +2201,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc) > fprintf(stderr, "POP:%X ", > cf->pop_count); > if (cf->count && (cfop->flags & CF_EMIT)) > fprintf(stderr, "STREAM%d ", cf->count); > + if (cf->vpm) > + fprintf(stderr, "VPM "); > if (cf->end_of_program) > fprintf(stderr, "EOP "); > fprintf(stderr, "\n"); > diff --git a/src/gallium/drivers/r600/r600_isa.c > b/src/gallium/drivers/r600/r600_isa.c > index 2633cdc..611b370 100644 > --- a/src/gallium/drivers/r600/r600_isa.c > +++ b/src/gallium/drivers/r600/r600_isa.c > @@ -506,6 +506,7 @@ static const struct cf_op_info cf_op_table[] = { > {"ALU_EXT", { -1, -1, 0x0C, 0x0C }, > CF_CLAUSE | CF_ALU | CF_ALU_EXT }, > {"ALU_CONTINUE", { 0x0D, 0x0D, 0x0D, -1 }, > CF_CLAUSE | CF_ALU }, > {"ALU_BREAK", { 0x0E, 0x0E, 0x0E, -1 }, > CF_CLAUSE | CF_ALU }, > + {"ALU_VALID_PIXEL_MODE", { -1, -1, -1, 0x0E }, > CF_CLAUSE | CF_ALU }, > {"ALU_ELSE_AFTER", { 0x0F, 0x0F, 0x0F, 0x0F }, > CF_CLAUSE | CF_ALU }, > {"CF_NATIVE", { 0x00, 0x00, 0x00, 0x00 }, > 0 } > }; > diff --git a/src/gallium/drivers/r600/r600_isa.h > b/src/gallium/drivers/r600/r600_isa.h > index f6e2697..fcaf1f7 100644 > --- a/src/gallium/drivers/r600/r600_isa.h > +++ b/src/gallium/drivers/r600/r600_isa.h > @@ -646,10 +646,11 @@ struct cf_op_info > #define CF_OP_ALU_EXT 84 > #define CF_OP_ALU_CONTINUE 85 > #define CF_OP_ALU_BREAK 86 > -#define CF_OP_ALU_ELSE_AFTER 87 > +#define CF_OP_ALU_VALID_PIXEL_MODE 87 > +#define CF_OP_ALU_ELSE_AFTER 88 > > /* CF_NATIVE means that r600_bytecode_cf contains pre-encoded native data */ > -#define CF_NATIVE 88 > +#define CF_NATIVE 89 > > enum r600_chip_class { > ISA_CC_R600, > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index a462691..9388db9 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -197,6 +197,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, > > use_sb &= !shader->shader.uses_atomics; > use_sb &= !shader->shader.uses_images; > + use_sb &= !shader->shader.uses_helper_invocation; > > /* Check if the bytecode has already been built. */ > if (!shader->shader.bc.bytecode) { > @@ -346,6 +347,7 @@ struct r600_shader_ctx { > boolean clip_vertex_write; > unsigned cv_output; > unsigned edgeflag_output; > + int helper_invoc_reg; > int cs_block_size_reg; > int cs_grid_size_reg; > bool cs_block_size_loaded, cs_grid_size_loaded; > @@ -1295,6 +1297,93 @@ static int load_sample_position(struct r600_shader_ctx > *ctx, struct r600_shader_ > return t1; > } > > +static int eg_load_helper_invocation(struct r600_shader_ctx *ctx) > +{ > + int r; > + struct r600_bytecode_alu alu; > + > + /* do a vtx fetch with wqm set on the vtx fetch */ > + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); > + alu.op = ALU_OP1_MOV; > + alu.dst.sel = ctx->helper_invoc_reg; > + alu.dst.chan = 0; > + alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; > + alu.src[0].value = 0xffffffff; > + alu.dst.write = 1; > + alu.last = 1; > + r = r600_bytecode_add_alu(ctx->bc, &alu); > + if (r) > + return r; > + > + /* do a vtx fetch in VPM mode */ > + struct r600_bytecode_vtx vtx; > + memset(&vtx, 0, sizeof(vtx)); > + vtx.op = FETCH_OP_GET_BUFFER_RESINFO; > + vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; > + vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; > + vtx.src_gpr = 0; > + vtx.mega_fetch_count = 16; /* no idea here really... */ > + vtx.dst_gpr = ctx->helper_invoc_reg; > + vtx.dst_sel_x = 4; > + vtx.dst_sel_y = 7; /* SEL_Y */ > + vtx.dst_sel_z = 7; /* SEL_Z */ > + vtx.dst_sel_w = 7; /* SEL_W */ > + vtx.data_format = FMT_32; > + if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx))) > + return r; > + ctx->bc->cf_last->vpm = 1; > + > + /* compare the result with 0 */ > + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); > + alu.op = ALU_OP3_CNDE_INT; > + alu.is_op3 = 1; > + alu.dst.sel = ctx->helper_invoc_reg; > + alu.dst.chan = 0; > + alu.dst.write = 1; > + alu.src[0].sel = ctx->helper_invoc_reg; > + alu.src[0].chan = 0; > + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; > + alu.src[1].value = 0x0; > + alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; > + alu.src[2].value = 0xffffffff; > + alu.last = 1; > + r = r600_bytecode_add_alu(ctx->bc, &alu); > + if (r) > + return r; I realized this only later, this alu conditional is completely unnecessary, just skip it...
Other than that, Reviewed-by: Roland Scheidegger <[email protected]> > + return 0; > +} > + > +static int cm_load_helper_invocation(struct r600_shader_ctx *ctx) > +{ > + int r; > + struct r600_bytecode_alu alu; > + > + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); > + alu.op = ALU_OP1_MOV; > + alu.dst.sel = ctx->helper_invoc_reg; > + alu.dst.chan = 0; > + alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; > + alu.src[0].value = 0xffffffff; > + alu.dst.write = 1; > + alu.last = 1; > + r = r600_bytecode_add_alu(ctx->bc, &alu); > + if (r) > + return r; > + > + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); > + alu.op = ALU_OP1_MOV; > + alu.dst.sel = ctx->helper_invoc_reg; > + alu.dst.chan = 0; > + alu.src[0].sel = V_SQ_ALU_SRC_0; > + alu.dst.write = 1; > + alu.last = 1; > + r = r600_bytecode_add_alu_type(ctx->bc, &alu, > CF_OP_ALU_VALID_PIXEL_MODE); > + if (r) > + return r; > + > + return ctx->helper_invoc_reg; > +} > + > static int load_block_grid_size(struct r600_shader_ctx *ctx, bool load_block) > { > struct r600_bytecode_vtx vtx; > @@ -1458,6 +1547,12 @@ static void tgsi_src(struct r600_shader_ctx *ctx, > r600_src->sel = load_block_grid_size(ctx, false); > } else if > (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == > TGSI_SEMANTIC_BLOCK_SIZE) { > r600_src->sel = load_block_grid_size(ctx, true); > + } else if > (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == > TGSI_SEMANTIC_HELPER_INVOCATION) { > + r600_src->sel = ctx->helper_invoc_reg; > + r600_src->swizzle[0] = 0; > + r600_src->swizzle[1] = 0; > + r600_src->swizzle[2] = 0; > + r600_src->swizzle[3] = 0; > } > } else { > if (tgsi_src->Register.Indirect) > @@ -3120,6 +3215,7 @@ static int r600_shader_from_tgsi(struct r600_context > *rctx, > tgsi_scan_shader(tokens, &ctx.info); > shader->indirect_files = ctx.info.indirect_files; > > + shader->uses_helper_invocation = false; > shader->uses_doubles = ctx.info.uses_doubles; > shader->uses_atomics = ctx.info.file_mask[TGSI_FILE_HW_ATOMIC]; > shader->nsys_inputs = 0; > @@ -3193,6 +3289,7 @@ static int r600_shader_from_tgsi(struct r600_context > *rctx, > ctx.clip_vertex_write = 0; > ctx.thread_id_gpr_loaded = false; > > + ctx.helper_invoc_reg = -1; > ctx.cs_block_size_reg = -1; > ctx.cs_grid_size_reg = -1; > ctx.cs_block_size_loaded = false; > @@ -3238,6 +3335,13 @@ static int r600_shader_from_tgsi(struct r600_context > *rctx, > ctx.file_offset[TGSI_FILE_INPUT] = > evergreen_gpr_count(&ctx); > else > ctx.file_offset[TGSI_FILE_INPUT] = > allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]); > + > + for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) { > + if (ctx.info.system_value_semantic_name[i] == > TGSI_SEMANTIC_HELPER_INVOCATION) { > + ctx.helper_invoc_reg = > ctx.file_offset[TGSI_FILE_INPUT]++; > + shader->uses_helper_invocation = true; > + } > + } > } > if (ctx.type == PIPE_SHADER_GEOMETRY) { > /* FIXME 1 would be enough in some cases (3 or less input > vertices) */ > @@ -3439,6 +3543,15 @@ static int r600_shader_from_tgsi(struct r600_context > *rctx, > if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN) > shader->nr_ps_max_color_exports = 8; > > + if (ctx.shader->uses_helper_invocation) { > + if (ctx.bc->chip_class == CAYMAN) > + r = cm_load_helper_invocation(&ctx); > + else > + r = eg_load_helper_invocation(&ctx); > + if (r) > + return r; > + > + } > if (ctx.fragcoord_input >= 0) { > if (ctx.bc->chip_class == CAYMAN) { > for (j = 0 ; j < 4; j++) { > diff --git a/src/gallium/drivers/r600/r600_shader.h > b/src/gallium/drivers/r600/r600_shader.h > index 8444907..da96688 100644 > --- a/src/gallium/drivers/r600/r600_shader.h > +++ b/src/gallium/drivers/r600/r600_shader.h > @@ -119,6 +119,7 @@ struct r600_shader { > boolean uses_doubles; > boolean uses_atomics; > boolean uses_images; > + boolean uses_helper_invocation; > uint8_t atomic_base; > uint8_t rat_base; > uint8_t image_size_const_offset; > diff --git a/src/gallium/drivers/r600/r600_sq.h > b/src/gallium/drivers/r600/r600_sq.h > index f51ffcf..6b07dc1 100644 > --- a/src/gallium/drivers/r600/r600_sq.h > +++ b/src/gallium/drivers/r600/r600_sq.h > @@ -198,6 +198,8 @@ > #define EG_V_SQ_ALU_SRC_LDS_DIRECT_B > 0x000000E0 > #define EG_V_SQ_ALU_SRC_TIME_HI > 0x000000E3 > #define EG_V_SQ_ALU_SRC_TIME_LO > 0x000000E4 > +#define EG_V_SQ_ALU_SRC_MASK_HI > 0x000000E5 > +#define EG_V_SQ_ALU_SRC_MASK_LO > 0x000000E6 > #define EG_V_SQ_ALU_SRC_HW_WAVE_ID > 0x000000E7 > #define EG_V_SQ_ALU_SRC_SIMD_ID > 0x000000E8 > #define EG_V_SQ_ALU_SRC_SE_ID > 0x000000E9 > _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
