We can also say if gl_TessLevel* is written multiple times, then one these must be true: - there must not be a barrier between the writes - gl_TessLevel* writes aren't inside conditional blocks
I see that these games use a barrier in TCS: - Hitman - Grid Autosport - Tomb Raider All constraints are true for those games. If we do this, it should be enough: "there must not be a barrier between the writes" Marek On Thu, Sep 7, 2017 at 9:13 AM, Nicolai Hähnle <[email protected]> wrote: > On 06.09.2017 19:03, Marek Olšák wrote: >> >> From: Marek Olšák <[email protected]> >> >> The pass tries to deduce whether tess factors are always written by >> invocation 0 (at least). >> >> The implication for radeonsi is that it doesn't have to use a barrier >> near the end of TCS, and doesn't have to use LDS for passing the tess >> factors to the epilog. > > > What about the following shader snippet, which is particularly nasty on > purpose: > > gl_TessLevelInner = ...; > > barrier(); > > if (gl_InvocationID == 1) { > gl_TessLevelInner = ...; > } > > The final gl_TessLevelInner must be the one written by invocation 1. > > The point is, this would be a nice improvement, but it requires us to check > that whenever tess factors are written, then that path includes invocation > 0. > > (The written value itself might depend on the invocation ID, but in that > case, we can do whatever we like, which means we can just take the value > written by invocation 0.) > > I don't think we can determine this easily with the tools we have, and the > effort is likely not worth it. > > Although you could do a very simplistic approximation of "tess factors are > only written outside of control flow", which is probably reasonably common. > (Uniform control flow would also work -- but that would require going to the > LLVM level). > > Cheers, > Nicolai > > > >> --- >> src/gallium/auxiliary/tgsi/tgsi_scan.c | 188 >> +++++++++++++++++++++++++++++++++ >> src/gallium/auxiliary/tgsi/tgsi_scan.h | 11 ++ >> 2 files changed, 199 insertions(+) >> >> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c >> b/src/gallium/auxiliary/tgsi/tgsi_scan.c >> index db87ce3..612a8c6 100644 >> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c >> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c >> @@ -930,10 +930,198 @@ tgsi_scan_arrays(const struct tgsi_token *tokens, >> array->writemask |= dst->Register.WriteMask; >> } >> } >> } >> } >> tgsi_parse_free(&parse); >> return; >> } >> + >> +static void >> +check_no_subroutines(const struct tgsi_full_instruction *inst) >> +{ >> + switch (inst->Instruction.Opcode) { >> + case TGSI_OPCODE_BGNSUB: >> + case TGSI_OPCODE_ENDSUB: >> + case TGSI_OPCODE_CAL: >> + unreachable("subroutines unhandled"); >> + } >> +} >> + >> +static unsigned >> +get_inst_tessfactor_writemask(const struct tgsi_shader_info *info, >> + const struct tgsi_full_instruction *inst) >> +{ >> + unsigned writemask = 0; >> + >> + for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) { >> + const struct tgsi_full_dst_register *dst = &inst->Dst[i]; >> + >> + if (dst->Register.File == TGSI_FILE_OUTPUT && >> + !dst->Register.Indirect) { >> + unsigned name = info->output_semantic_name[dst->Register.Index]; >> + >> + if (name == TGSI_SEMANTIC_TESSINNER) >> + writemask |= dst->Register.WriteMask; >> + else if (name == TGSI_SEMANTIC_TESSOUTER) >> + writemask |= dst->Register.WriteMask << 4; >> + } >> + } >> + return writemask; >> +} >> + >> +static unsigned >> +get_block_tessfactor_writemask(const struct tgsi_shader_info *info, >> + struct tgsi_parse_context *parse, >> + unsigned end_opcode) >> +{ >> + struct tgsi_full_instruction *inst; >> + unsigned writemask = 0; >> + >> + do { >> + tgsi_parse_token(parse); >> + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); >> + inst = &parse->FullToken.FullInstruction; >> + check_no_subroutines(inst); >> + >> + /* Recursively process nested blocks. */ >> + switch (inst->Instruction.Opcode) { >> + case TGSI_OPCODE_IF: >> + case TGSI_OPCODE_UIF: >> + writemask |= >> + get_block_tessfactor_writemask(info, parse, >> TGSI_OPCODE_ENDIF); >> + continue; >> + >> + case TGSI_OPCODE_BGNLOOP: >> + writemask |= >> + get_block_tessfactor_writemask(info, parse, >> TGSI_OPCODE_ENDLOOP); >> + continue; >> + } >> + >> + writemask |= get_inst_tessfactor_writemask(info, inst); >> + } while (inst->Instruction.Opcode != end_opcode); >> + >> + return writemask; >> +} >> + >> +static void >> +get_if_block_tessfactor_writemask(const struct tgsi_shader_info *info, >> + struct tgsi_parse_context *parse, >> + unsigned *upper_block_tf_writemask, >> + unsigned *cond_block_tf_writemask) >> +{ >> + struct tgsi_full_instruction *inst; >> + unsigned then_tessfactor_writemask = 0; >> + unsigned else_tessfactor_writemask = 0; >> + bool is_then = true; >> + >> + do { >> + tgsi_parse_token(parse); >> + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); >> + inst = &parse->FullToken.FullInstruction; >> + check_no_subroutines(inst); >> + >> + switch (inst->Instruction.Opcode) { >> + case TGSI_OPCODE_ELSE: >> + is_then = false; >> + continue; >> + >> + /* Recursively process nested blocks. */ >> + case TGSI_OPCODE_IF: >> + case TGSI_OPCODE_UIF: >> + get_if_block_tessfactor_writemask(info, parse, >> + is_then ? >> &then_tessfactor_writemask : >> + >> &else_tessfactor_writemask, >> + cond_block_tf_writemask); >> + continue; >> + >> + case TGSI_OPCODE_BGNLOOP: >> + *cond_block_tf_writemask |= >> + get_block_tessfactor_writemask(info, parse, >> TGSI_OPCODE_ENDLOOP); >> + continue; >> + } >> + >> + /* Process an instruction in the current block. */ >> + unsigned writemask = get_inst_tessfactor_writemask(info, inst); >> + >> + if (writemask) { >> + if (is_then) >> + then_tessfactor_writemask |= writemask; >> + else >> + else_tessfactor_writemask |= writemask; >> + } >> + } while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF); >> + >> + if (then_tessfactor_writemask || else_tessfactor_writemask) { >> + /* If both statements write the same tess factor channels, >> + * we can say that the upper block writes them too. */ >> + *upper_block_tf_writemask |= then_tessfactor_writemask & >> + else_tessfactor_writemask; >> + *cond_block_tf_writemask |= then_tessfactor_writemask | >> + else_tessfactor_writemask; >> + } >> +} >> + >> +void >> +tgsi_scan_tess_ctrl(const struct tgsi_token *tokens, >> + const struct tgsi_shader_info *info, >> + struct tgsi_tessctrl_info *out) >> +{ >> + memset(out, 0, sizeof(*out)); >> + >> + if (info->processor != PIPE_SHADER_TESS_CTRL) >> + return; >> + >> + struct tgsi_parse_context parse; >> + if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) { >> + debug_printf("tgsi_parse_init() failed in tgsi_scan_arrays()!\n"); >> + return; >> + } >> + >> + /* The pass works as follows: >> + * If all codepaths write tess factors, we can say that all >> invocations >> + * define tess factors, therefore invocation 0 defines tess factors. >> + * >> + * Each tess factor channel is tracked separately. >> + */ >> + unsigned main_block_tf_writemask = 0; >> + unsigned cond_block_tf_writemask = 0; >> + >> + while (!tgsi_parse_end_of_tokens(&parse)) { >> + tgsi_parse_token(&parse); >> + >> + if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) >> + continue; >> + >> + struct tgsi_full_instruction *inst = >> &parse.FullToken.FullInstruction; >> + check_no_subroutines(inst); >> + >> + /* Process nested blocks. */ >> + switch (inst->Instruction.Opcode) { >> + case TGSI_OPCODE_IF: >> + case TGSI_OPCODE_UIF: >> + get_if_block_tessfactor_writemask(info, &parse, >> + &main_block_tf_writemask, >> + &cond_block_tf_writemask); >> + continue; >> + >> + case TGSI_OPCODE_BGNLOOP: >> + cond_block_tf_writemask |= >> + get_block_tessfactor_writemask(info, &parse, >> TGSI_OPCODE_ENDIF); >> + continue; >> + } >> + >> + main_block_tf_writemask |= get_inst_tessfactor_writemask(info, >> inst); >> + } >> + >> + /* If there is a conditional block that writes a tess factor channel >> that >> + * the main block doesn't write, we can't say (we're not sure) that >> + * invocation 0 writes it. >> + */ >> + out->invoc0_tessfactors_are_def = >> + main_block_tf_writemask && >> + !(cond_block_tf_writemask & ~main_block_tf_writemask); >> + >> + tgsi_parse_free(&parse); >> +} >> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h >> b/src/gallium/auxiliary/tgsi/tgsi_scan.h >> index 857434f..ff64789 100644 >> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h >> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h >> @@ -181,30 +181,41 @@ struct tgsi_array_info >> /** Whether an array with this ID was declared. */ >> bool declared; >> /** The OR of all writemasks used to write to this array. */ >> ubyte writemask; >> /** The range with which the array was declared. */ >> struct tgsi_declaration_range range; >> }; >> +struct tgsi_tessctrl_info >> +{ >> + /** Whether all codepaths of invocation 0 write tess factors. */ >> + bool invoc0_tessfactors_are_def; >> +}; >> + >> extern void >> tgsi_scan_shader(const struct tgsi_token *tokens, >> struct tgsi_shader_info *info); >> void >> tgsi_scan_arrays(const struct tgsi_token *tokens, >> unsigned file, >> unsigned max_array_id, >> struct tgsi_array_info *arrays); >> +void >> +tgsi_scan_tess_ctrl(const struct tgsi_token *tokens, >> + const struct tgsi_shader_info *info, >> + struct tgsi_tessctrl_info *out); >> + >> static inline bool >> tgsi_is_bindless_image_file(unsigned file) >> { >> return file != TGSI_FILE_IMAGE && >> file != TGSI_FILE_MEMORY && >> file != TGSI_FILE_BUFFER; >> } >> #ifdef __cplusplus >> } // extern "C" >> > > > -- > Lerne, wie die Welt wirklich ist, > Aber vergiss niemals, wie sie sein sollte. _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
