Iago Toral Quiroga <[email protected]> writes: > This is a problem when we have IR like this: > > (array_ref (var_ref temps) (swiz x (expression ivec4 bitcast_f2i > (swiz xxxx (array_ref (var_ref temps) (constant int (2)) ) )) )) ) ) > > where we are indexing an array with the result of an expression that > accesses the same array. > > In this scenario, temps will be moved to scratch space and we will need > to add scratch reads/writes for all accesses to temps, however, the > current implementation does not consider the case where a reladdr pointer > (obtained by indexing into temps trough a expression) points to a register > that is also stored in scratch space (as in this case, where the expression > used to index temps access temps[2]), and thus, requires a scratch read > before it is accessed. > > v2 (Francisco Jerez): > - Handle also recursive reladdr addressing. > - Do not memcpy dst_reg into src_reg when rewriting reladdr. > > v3 (Francisco Jerez): > - Reduce complexity by moving recursive reladdr scratch access handling > to a separate recursive function. > - Do not skip demoting reladdr index registers to scratch space if the > top level GRF has already been visited. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89508 > --- > src/mesa/drivers/dri/i965/brw_vec4.h | 2 + > src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 116 > ++++++++++++++++++++----- > 2 files changed, 94 insertions(+), 24 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h > b/src/mesa/drivers/dri/i965/brw_vec4.h > index 33297ae..6ec00d5 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.h > +++ b/src/mesa/drivers/dri/i965/brw_vec4.h > @@ -367,6 +367,8 @@ public: > dst_reg dst, > src_reg orig_src, > int base_offset); > + src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block, > + vec4_instruction *inst, src_reg src); > > bool try_emit_mad(ir_expression *ir); > bool try_emit_b2f_of_compare(ir_expression *ir); > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > index 26a3b9f..8e256f9 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > @@ -3352,6 +3352,48 @@ vec4_visitor::emit_scratch_write(bblock_t *block, > vec4_instruction *inst, > } > > /** > + * Checks if @src and/or src.reladdr require a scratch read, and if so,
I don't think @ is valid doxygen syntax for referring to a function
parameter, use \p.
> + * adds the scratch read(s) before @inst. The function also checks for
> + * recursive reladdr scratch accesses, issuing the corresponding scratch
> + * loads and rewriting reladdr references accordingly.
> + *
> + * @return @src if it did not require a scratch load, otherwise, the
> + * register holding the result of the scratch load that the caller should
> + * use to rewrite src.
> + */
> +src_reg
> +vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
> + vec4_instruction *inst, src_reg src)
> +{
> + /* First, deal with reladdr scratch accesses if needed */
> + if (src.reladdr && src.reladdr->file == GRF &&
> + scratch_loc[src.reladdr->reg] != -1) {
You just need to check src.reladdr here, the rest of the condition is
redundant.
> + /* Resolve recursive reladdr scratch access by calling ourselves
> + * with src.reladdr
> + */
> + src_reg new_reladdr = emit_resolve_reladdr(scratch_loc, block,
> + inst, *src.reladdr);
You should be able to assign the return value directly to *src.reladdr if
you make the change I suggest below.
> + /* Rewrite our reladdr with the register holding the result of
> + * our sec.reladdr scratch load
> + */
> + src.reladdr->file = new_reladdr.file;
> + src.reladdr->reg = new_reladdr.reg;
> + src.reladdr->reg_offset = new_reladdr.reg_offset;
> + src.reladdr->reladdr = NULL;
> + }
> +
> + /* Now handle scratch access on src */
> + if (src.file == GRF && scratch_loc[src.reg] != -1) {
> + dst_reg temp = dst_reg(this, glsl_type::vec4_type);
> + emit_scratch_read(block, inst, temp, src,
> + scratch_loc[src.reg]);
> + return src_reg(temp);
This should preserve the register type, flags and swizzle of src. Maybe
just modify src.reg, .reg_offset and .reladdr to point to temp as you
did elsewhere.
> + } else {
> + return src;
> + }
> +}
> +
> +/**
> * We can't generally support array access in GRF space, because a
> * single instruction's destination can only span 2 contiguous
> * registers. So, we send all GRF arrays that get variable index
> @@ -3368,20 +3410,29 @@ vec4_visitor::move_grf_array_access_to_scratch()
> * scratch.
> */
> foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
> - if (inst->dst.file == GRF && inst->dst.reladdr &&
> - scratch_loc[inst->dst.reg] == -1) {
> - scratch_loc[inst->dst.reg] = c->last_scratch;
> - c->last_scratch += this->alloc.sizes[inst->dst.reg];
> + if (inst->dst.file == GRF && inst->dst.reladdr) {
> + if (scratch_loc[inst->dst.reg] == -1) {
> + scratch_loc[inst->dst.reg] = c->last_scratch;
> + c->last_scratch += this->alloc.sizes[inst->dst.reg];
> + }
> +
> + src_reg *iter = inst->dst.reladdr;
Any reason you didn't fold this declaration into the init statement of
the for loop? It's not a big deal in this specific case but doing this
in general will extend the scope of the induction variable outside of
the loop unnecessarily.
> + for (; iter->reladdr; iter = iter->reladdr) {
> + if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
> + scratch_loc[iter->reg] = c->last_scratch;
> + c->last_scratch += this->alloc.sizes[iter->reg];
> + }
> + }
> }
>
> for (int i = 0 ; i < 3; i++) {
> - src_reg *src = &inst->src[i];
> -
> - if (src->file == GRF && src->reladdr &&
> - scratch_loc[src->reg] == -1) {
> - scratch_loc[src->reg] = c->last_scratch;
> - c->last_scratch += this->alloc.sizes[src->reg];
> - }
> + src_reg *iter = &inst->src[i];
Same here.
> + for (; iter->reladdr; iter = iter->reladdr) {
> + if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
> + scratch_loc[iter->reg] = c->last_scratch;
> + c->last_scratch += this->alloc.sizes[iter->reg];
> + }
> + }
> }
> }
>
> @@ -3395,23 +3446,40 @@ vec4_visitor::move_grf_array_access_to_scratch()
> base_ir = inst->ir;
> current_annotation = inst->annotation;
>
> + /* First handle scratch access on the dst. Notice we have to handle
> + * the case where the dst's reladdr also points to scratch space.
> + */
> + if (inst->dst.reladdr) {
> + src_reg temp = emit_resolve_reladdr(scratch_loc, block, inst,
> + *inst->dst.reladdr);
> + inst->dst.reladdr->file = temp.file;
> + inst->dst.reladdr->reg = temp.reg;
> + inst->dst.reladdr->reg_offset = temp.reg_offset;
> + inst->dst.reladdr->reladdr = NULL;
> + }
> +
With the change I suggested above you can cut this down to:
| if (inst->dst.reladdr)
| *inst->dst.reladdr = emit_resolve_reladdr(..., *inst->dst.reladdr);
> if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
> - emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]);
> + /* Now that we have handled any (possibly recursive) reladdr scratch
> + * accesses for dst we can safely do the scratch write for dst
> itself
> + */
> + emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]);
> }
>
> + /* Now handle scratch access on any src. In this case, since
> inst->src[i]
> + * already is a src_reg, we can just call emit_resolve_reladdr with
> + * inst->src[i] and it will take care of handling scratch loads for
> + * both src and src.reladdr (recursively).
> + */
> for (int i = 0 ; i < 3; i++) {
> - if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
> - continue;
> -
> - dst_reg temp = dst_reg(this, glsl_type::vec4_type);
> -
> - emit_scratch_read(block, inst, temp, inst->src[i],
> - scratch_loc[inst->src[i].reg]);
> -
> - inst->src[i].file = temp.file;
> - inst->src[i].reg = temp.reg;
> - inst->src[i].reg_offset = temp.reg_offset;
> - inst->src[i].reladdr = NULL;
> + if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
> + continue;
> +
This check is redundant, it's already taken care of by
emit_resolve_reladdr().
> + src_reg temp = emit_resolve_reladdr(scratch_loc, block, inst,
> + inst->src[i]);
> + inst->src[i].file = temp.file;
> + inst->src[i].reg = temp.reg;
> + inst->src[i].reg_offset = temp.reg_offset;
> + inst->src[i].reladdr = NULL;
inst->src[i] = emit_resolve_reladdr(..., inst->src[i]);
> }
> }
> }
> --
> 1.9.1
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
