Jason Ekstrand <[email protected]> writes: > Doing instruction header setup in the generator is aweful for a number > of reasons. For one, we can't schedule the header setup at all. For > another, it means lots of implied writes which the instruction scheduler > and other passes can't properly read about. The second isn't a huge > problem for FB writes since they always happen at the end. We made a > similar change to sampler handling in ff4726077d86. > --- > src/intel/compiler/brw_fs.cpp | 103 > ++++++++++++++++++++++++++------ > src/intel/compiler/brw_fs_generator.cpp | 66 -------------------- > 2 files changed, 86 insertions(+), 83 deletions(-) > > diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp > index 3e9ccc4..5276a66 100644 > --- a/src/intel/compiler/brw_fs.cpp > +++ b/src/intel/compiler/brw_fs.cpp > @@ -3247,7 +3247,18 @@ fs_visitor::emit_repclear_shader() > write->mlen = 1; > } else { > assume(key->nr_color_regions > 0); > + > + struct brw_reg header = > + retype(brw_message_reg(base_mrf), BRW_REGISTER_TYPE_UD); > + bld.exec_all().group(16, 0) > + .MOV(header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); > + > for (int i = 0; i < key->nr_color_regions; ++i) { > + if (i > 0) { > + bld.exec_all().group(1, 0) > + .MOV(component(header, 2), brw_imm_ud(i)); > + } > + > write = bld.emit(FS_OPCODE_REP_FB_WRITE); > write->saturate = key->clamp_fragment_color; > write->base_mrf = base_mrf; > @@ -3972,25 +3983,83 @@ lower_fb_write_logical_send(const fs_builder &bld, > fs_inst *inst, > int header_size = 2, payload_header_size; > unsigned length = 0; > > - /* From the Sandy Bridge PRM, volume 4, page 198: > - * > - * "Dispatched Pixel Enables. One bit per pixel indicating > - * which pixels were originally enabled when the thread was > - * dispatched. This field is only required for the end-of- > - * thread message and on all dual-source messages." > - */ > - if (devinfo->gen >= 6 && > - (devinfo->is_haswell || devinfo->gen >= 8 || !prog_data->uses_kill) && > - color1.file == BAD_FILE && > - key->nr_color_regions == 1) { > - header_size = 0; > - } > + if (devinfo->gen < 6) { > + /* For gen4-5, we always have a header consisting of g0 and g1. We > have > + * an implied MOV from g0,g1 to the start of the message. The MOV from > + * g0 is handled by the hardware and the MOV from g1 is provided by the > + * generator. This is required because, on gen4-5, the generator may > + * generate two write messages with different message lengths in order > + * to handle AA data properly. > + * > + * Also, since the pixel mask goes in the g0 portion of the message and > + * since render target writes are the last thing in the shader, we > write > + * the pixel mask directly into g0 and it will get copied as part of > the > + * implied write. > + */ > + if (prog_data->uses_kill) { > + bld.exec_all().group(1, 0) > + .MOV(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW), > + brw_flag_reg(0, 1)); > + } > + > + assert(length == 0); > + length = 2; > + } else if ((devinfo->gen <= 7 && !devinfo->is_haswell && > + prog_data->uses_kill) || > + color1.file != BAD_FILE || > + key->nr_color_regions > 1) { > + /* From the Sandy Bridge PRM, volume 4, page 198: > + * > + * "Dispatched Pixel Enables. One bit per pixel indicating > + * which pixels were originally enabled when the thread was > + * dispatched. This field is only required for the end-of- > + * thread message and on all dual-source messages." > + */ > + const fs_builder ubld = bld.exec_all().group(8, 0); > + > + /* The header starts off as g0 and g1 */ > + fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); > + ubld.group(16, 0).MOV(header, retype(brw_vec8_grf(0, 0), > + BRW_REGISTER_TYPE_UD)); > + > + uint32_t g00_bits = 0; > + > + /* Set "Source0 Alpha Present to RenderTarget" bit in message > + * header. > + */ > + if (inst->target > 0 && key->replicate_alpha) > + g00_bits |= 1 << 11; > + > + /* Set computes stencil to render target */ > + if (prog_data->computed_stencil) > + g00_bits |= 1 << 14; > + > + if (g00_bits) { > + /* OR extra bits into g0.0 */ > + ubld.group(1, 0).OR(component(header, 0), > + retype(brw_vec1_grf(0, 0), > + BRW_REGISTER_TYPE_UD), > + brw_imm_ud(g00_bits)); > + } > + > + /* Set the render target index for choosing BLEND_STATE. */ > + if (inst->target > 0) { > + ubld.group(1, 0).MOV(component(header, 2), > brw_imm_ud(inst->target)); > + } > + > + if (prog_data->uses_kill) { > + ubld.group(1, 0).MOV(retype(component(header, 15), > + BRW_REGISTER_TYPE_UW), > + brw_flag_reg(0, 1)); > + }
The original patch from my branch implemented the header setup in a
helper function called setup_fb_write_header() which allowed it to share
the code among emit_repclear_shader() and lower_fb_write_logical_send().
I don't think there is any benefit from open-coding the header setup in
the already complex enough callers, seems like an obfuscation to me
(particularly of the emit_repclear_shader() codepath with PATCH 52 in
mind).
>
> - if (header_size != 0) {
> - assert(header_size == 2);
> - /* Allocate 2 registers for a header */
> - length += 2;
> + assert(length == 0);
> + sources[0] = header;
> + sources[1] = horiz_offset(header, 8);
> + length = 2;
> }
> + assert(length == 0 || length == 2);
> + header_size = length;
>
> if (payload.aa_dest_stencil_reg) {
> sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1));
> diff --git a/src/intel/compiler/brw_fs_generator.cpp
> b/src/intel/compiler/brw_fs_generator.cpp
> index 8cc9d31..1a2b961 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -307,9 +307,6 @@ fs_generator::fire_fb_write(fs_inst *inst,
> void
> fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
> {
> - struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
> - const brw_wm_prog_key * const key = (brw_wm_prog_key * const) this->key;
> -
> if (devinfo->gen < 8 && !devinfo->is_haswell) {
> brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
> }
> @@ -320,69 +317,6 @@ fs_generator::generate_fb_write(fs_inst *inst, struct
> brw_reg payload)
> if (inst->base_mrf >= 0)
> payload = brw_message_reg(inst->base_mrf);
>
> - /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
> - * move, here's g1.
> - */
> - if (inst->header_size != 0) {
> - brw_push_insn_state(p);
> - brw_set_default_mask_control(p, BRW_MASK_DISABLE);
> - brw_set_default_exec_size(p, BRW_EXECUTE_1);
> - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
> - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
> - brw_set_default_flag_reg(p, 0, 0);
> -
> - /* On HSW, the GPU will use the predicate on SENDC, unless the header
> is
> - * present.
> - */
> - if (prog_data->uses_kill) {
> - struct brw_reg pixel_mask;
> -
> - if (devinfo->gen >= 6)
> - pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
> - else
> - pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
> -
> - brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
> - }
> -
> - if (devinfo->gen >= 6) {
> - brw_push_insn_state(p);
> - brw_set_default_exec_size(p, BRW_EXECUTE_16);
> - brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
> - brw_MOV(p,
> - retype(payload, BRW_REGISTER_TYPE_UD),
> - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
> - brw_pop_insn_state(p);
> -
> - if (inst->target > 0 && key->replicate_alpha) {
> - /* Set "Source0 Alpha Present to RenderTarget" bit in message
> - * header.
> - */
> - brw_OR(p,
> - vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
> - vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
> - brw_imm_ud(0x1 << 11));
> - }
> -
> - if (inst->target > 0) {
> - /* Set the render target index for choosing BLEND_STATE. */
> - brw_MOV(p, retype(vec1(suboffset(payload, 2)),
> - BRW_REGISTER_TYPE_UD),
> - brw_imm_ud(inst->target));
> - }
> -
> - /* Set computes stencil to render target */
> - if (prog_data->computed_stencil) {
> - brw_OR(p,
> - vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
> - vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
> - brw_imm_ud(0x1 << 14));
> - }
> - }
> -
> - brw_pop_insn_state(p);
> - }
> -
> if (!runtime_check_aads_emit) {
> fire_fb_write(inst, payload, implied_header, inst->mlen);
> } else {
> --
> 2.5.0.400.gff86faf
>
> _______________________________________________
> mesa-dev mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
