On Mon, Sep 11, 2017 at 4:12 PM, Nicolai Hähnle <[email protected]> wrote: > On 07.09.2017 20:26, Marek Olšák wrote: >> >> From: Marek Olšák <[email protected]> >> >> The pass tries to deduce whether tess factors are always written by >> all shader invocations. >> >> The implication for radeonsi is that it doesn't have to use a barrier >> near the end of TCS, and doesn't have to use LDS for passing the tess >> factors to the epilog. >> >> v2: Handle barriers and do the analysis pass for each code segment >> surrounded by barriers separately, and AND results from all >> such segments writing tess factors. The change is trivial in the main >> switch statement. >> >> Also, the result is renamed to "tessfactors_are_def_in_all_invocs" >> to make the name accurate. >> --- >> src/gallium/auxiliary/tgsi/tgsi_scan.c | 224 >> +++++++++++++++++++++++++++++++++ >> src/gallium/auxiliary/tgsi/tgsi_scan.h | 11 ++ >> 2 files changed, 235 insertions(+) >> >> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c >> b/src/gallium/auxiliary/tgsi/tgsi_scan.c >> index db87ce3..b893289 100644 >> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c >> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c >> @@ -930,10 +930,234 @@ tgsi_scan_arrays(const struct tgsi_token *tokens, >> array->writemask |= dst->Register.WriteMask; >> } >> } >> } >> } >> tgsi_parse_free(&parse); >> return; >> } >> + >> +static void >> +check_no_subroutines(const struct tgsi_full_instruction *inst) >> +{ >> + switch (inst->Instruction.Opcode) { >> + case TGSI_OPCODE_BGNSUB: >> + case TGSI_OPCODE_ENDSUB: >> + case TGSI_OPCODE_CAL: >> + unreachable("subroutines unhandled"); >> + } >> +} >> + >> +static unsigned >> +get_inst_tessfactor_writemask(const struct tgsi_shader_info *info, >> + const struct tgsi_full_instruction *inst) >> +{ >> + unsigned writemask = 0; >> + >> + for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) { >> + const struct tgsi_full_dst_register *dst = &inst->Dst[i]; >> + >> + if (dst->Register.File == TGSI_FILE_OUTPUT && >> + !dst->Register.Indirect) { >> + unsigned name = info->output_semantic_name[dst->Register.Index]; >> + >> + if (name == TGSI_SEMANTIC_TESSINNER) >> + writemask |= dst->Register.WriteMask; >> + else if (name == TGSI_SEMANTIC_TESSOUTER) >> + writemask |= dst->Register.WriteMask << 4; >> + } >> + } >> + return writemask; >> +} >> + >> +static unsigned >> +get_block_tessfactor_writemask(const struct tgsi_shader_info *info, >> + struct tgsi_parse_context *parse, >> + unsigned end_opcode) >> +{ >> + struct tgsi_full_instruction *inst; >> + unsigned writemask = 0; >> + >> + do { >> + tgsi_parse_token(parse); >> + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); >> + inst = &parse->FullToken.FullInstruction; >> + check_no_subroutines(inst); >> + >> + /* Recursively process nested blocks. */ >> + switch (inst->Instruction.Opcode) { >> + case TGSI_OPCODE_IF: >> + case TGSI_OPCODE_UIF: >> + writemask |= >> + get_block_tessfactor_writemask(info, parse, >> TGSI_OPCODE_ENDIF); >> + continue; >> + >> + case TGSI_OPCODE_BGNLOOP: >> + writemask |= >> + get_block_tessfactor_writemask(info, parse, >> TGSI_OPCODE_ENDLOOP); >> + continue; >> + >> + case TGSI_OPCODE_BARRIER: >> + unreachable("nested BARRIER is illegal"); >> + continue; >> + } >> + >> + writemask |= get_inst_tessfactor_writemask(info, inst); >> + } while (inst->Instruction.Opcode != end_opcode); >> + >> + return writemask; >> +} >> + >> +static void >> +get_if_block_tessfactor_writemask(const struct tgsi_shader_info *info, >> + struct tgsi_parse_context *parse, >> + unsigned *upper_block_tf_writemask, >> + unsigned *cond_block_tf_writemask) >> +{ >> + struct tgsi_full_instruction *inst; >> + unsigned then_tessfactor_writemask = 0; >> + unsigned else_tessfactor_writemask = 0; >> + bool is_then = true; >> + >> + do { >> + tgsi_parse_token(parse); >> + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); >> + inst = &parse->FullToken.FullInstruction; >> + check_no_subroutines(inst); >> + >> + switch (inst->Instruction.Opcode) { >> + case TGSI_OPCODE_ELSE: >> + is_then = false; >> + continue; >> + >> + /* Recursively process nested blocks. */ >> + case TGSI_OPCODE_IF: >> + case TGSI_OPCODE_UIF: >> + get_if_block_tessfactor_writemask(info, parse, >> + is_then ? >> &then_tessfactor_writemask : >> + >> &else_tessfactor_writemask, >> + cond_block_tf_writemask); >> + continue; >> + >> + case TGSI_OPCODE_BGNLOOP: >> + *cond_block_tf_writemask |= >> + get_block_tessfactor_writemask(info, parse, >> TGSI_OPCODE_ENDLOOP); >> + continue; >> + >> + case TGSI_OPCODE_BARRIER: >> + unreachable("nested BARRIER is illegal"); >> + continue; >> + } >> + >> + /* Process an instruction in the current block. */ >> + unsigned writemask = get_inst_tessfactor_writemask(info, inst); >> + >> + if (writemask) { >> + if (is_then) >> + then_tessfactor_writemask |= writemask; >> + else >> + else_tessfactor_writemask |= writemask; >> + } >> + } while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF); >> + >> + if (then_tessfactor_writemask || else_tessfactor_writemask) { >> + /* If both statements write the same tess factor channels, >> + * we can say that the upper block writes them too. */ >> + *upper_block_tf_writemask |= then_tessfactor_writemask & >> + else_tessfactor_writemask; >> + *cond_block_tf_writemask |= then_tessfactor_writemask | >> + else_tessfactor_writemask; >> + } >> +} >> + >> +void >> +tgsi_scan_tess_ctrl(const struct tgsi_token *tokens, >> + const struct tgsi_shader_info *info, >> + struct tgsi_tessctrl_info *out) >> +{ >> + memset(out, 0, sizeof(*out)); >> + >> + if (info->processor != PIPE_SHADER_TESS_CTRL) >> + return; >> + >> + struct tgsi_parse_context parse; >> + if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) { >> + debug_printf("tgsi_parse_init() failed in tgsi_scan_arrays()!\n"); >> + return; >> + } >> + >> + /* The pass works as follows: >> + * If all codepaths write tess factors, we can say that all >> invocations >> + * define tess factors. >> + * >> + * Each tess factor channel is tracked separately. >> + */ >> + unsigned main_block_tf_writemask = 0; /* if main block writes tess >> factors */ >> + unsigned cond_block_tf_writemask = 0; /* if cond block writes tess >> factors */ >> + >> + /* Initial value = true. Here the pass will accumulate results from >> multiple >> + * segments surrounded by barriers. If tess factors aren't written at >> all, >> + * it's a shader bug and we don't care if this will be true. >> + */ >> + out->tessfactors_are_def_in_all_invocs = true; >> + >> + while (!tgsi_parse_end_of_tokens(&parse)) { >> + tgsi_parse_token(&parse); >> + >> + if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) >> + continue; >> + >> + struct tgsi_full_instruction *inst = >> &parse.FullToken.FullInstruction; >> + check_no_subroutines(inst); >> + >> + /* Process nested blocks. */ >> + switch (inst->Instruction.Opcode) { >> + case TGSI_OPCODE_IF: >> + case TGSI_OPCODE_UIF: >> + get_if_block_tessfactor_writemask(info, &parse, >> + &main_block_tf_writemask, >> + &cond_block_tf_writemask); >> + continue; >> + >> + case TGSI_OPCODE_BGNLOOP: >> + cond_block_tf_writemask |= >> + get_block_tessfactor_writemask(info, &parse, >> TGSI_OPCODE_ENDIF); >> + continue; >> + >> + case TGSI_OPCODE_BARRIER: >> + /* The following case must be prevented: >> + * gl_TessLevelInner = ...; >> + * barrier(); >> + * if (gl_InvocationID == 1) >> + * gl_TessLevelInner = ...; >> + * >> + * If you consider disjoint code segments separated by barriers, >> each >> + * such segment that writes tess factor channels should write >> the same >> + * channels in all codepaths within that segment. >> + */ >> + if (main_block_tf_writemask || cond_block_tf_writemask) { >> + /* Accumulate the result: */ >> + out->tessfactors_are_def_in_all_invocs &= >> + main_block_tf_writemask && >> + !(cond_block_tf_writemask & ~main_block_tf_writemask); > > > Could this be just the following: > > out->tessfactors_are_def_in_all_invocs &= > !(cond_block_tf_writemask & ~main_block_tf_writemask); > > (And the same below after the loop)
Do you mean that "main_block_tf_writemask &&" is redundant and always evaluates the same as the other part of the condition? > > Point being, what if you have a barrier() and no assignment to tessfactors > afterwards. This point doesn't make sense - the conditional block assures that code segments separated by barriers without assignments to tess factors are ignored. Marek _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
