On Wednesday, January 07, 2015 10:43:25 PM Kristian Høgsberg wrote: > SKL+ overloads the SIMD4x2 SIMD mode to mean either SIMD8D or SIMD4x2 > depending on bit 22 in the message header. If the bit is 0 or there is > no header we get SIMD8D. We always wand SIMD4x2 in vec4 and for fs pull > constants, so use a message header in those cases and set bit 22 there. > > Signed-off-by: Kristian Høgsberg <k...@bitplanet.net> > --- > src/mesa/drivers/dri/i965/brw_defines.h | 5 ++++ > src/mesa/drivers/dri/i965/brw_fs.cpp | 8 ++++++ > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 32 > +++++++++++++++++++----- > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 15 ++++++++--- > src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++- > 5 files changed, 53 insertions(+), 11 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index 28e398d..f02a0b8 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -1373,6 +1373,11 @@ enum brw_message_target { > #define BRW_SAMPLER_SIMD_MODE_SIMD16 2 > #define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 > > +/* GEN9 changes SIMD mode 0 to mean SIMD8D, but lets us get the SIMD4x2 > + * behavior by setting bit 22 of dword 2 in the message header. */ > +#define GEN9_SAMPLER_SIMD_MODE_SIMD8D 0 > +#define GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2 (1 << 22) > + > #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 > #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 > #define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 8c7d780..9dfb7b7 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -2994,6 +2994,14 @@ fs_visitor::lower_uniform_pull_constant_loads() > const_offset_reg.fixed_hw_reg.dw1.ud /= 4; > fs_reg payload = fs_reg(this, glsl_type::uint_type); > > + /* We have to use a message header on Skylake to get SIMD4x2 mode. > + * Reserve space for the register. > + */ > + if (brw->gen >= 9) { > + payload.reg_offset++; > + virtual_grf_sizes[payload.reg] = 2; > + } > + > /* This is actually going to be a MOV, but since only the first > dword > * is accessed, we have a special opcode to do just that one. Note > * that this needs to be an operation that will be considered a def > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > index c652d65..7b4ac8d 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > @@ -1017,6 +1017,26 @@ > fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, > */ > dst.width = BRW_WIDTH_4; > > + struct brw_reg src = offset; > + bool header_present = false; > + int mlen = 1; > + > + if (brw->gen >= 9) { > + /* Skylake requires a message header in order to use SIMD4x2 mode. */ > + src = retype(brw_vec8_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD); > + mlen = 2; > + header_present = true; > + > + brw_push_insn_state(p); > + brw_set_default_mask_control(p, BRW_MASK_DISABLE); > + brw_MOV(p, src, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); > + brw_set_default_access_mode(p, BRW_ALIGN_1); > + > + brw_MOV(p, get_element_ud(src, 2), > + brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2)); > + brw_pop_insn_state(p); > + } > + > if (index.file == BRW_IMMEDIATE_VALUE) { > > uint32_t surf_index = index.dw1.ud; > @@ -1028,14 +1048,14 @@ > fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, > brw_pop_insn_state(p); > > brw_set_dest(p, send, dst); > - brw_set_src0(p, send, offset); > + brw_set_src0(p, send, src); > brw_set_sampler_message(p, send, > surf_index, > 0, /* LD message ignores sampler unit */ > GEN5_SAMPLER_MESSAGE_SAMPLE_LD, > 1, /* rlen */ > - 1, /* mlen */ > - false, /* no header */ > + mlen, > + header_present, > BRW_SAMPLER_SIMD_MODE_SIMD4X2, > 0); > > @@ -1064,8 +1084,8 @@ > fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, > 0 /* sampler */, > GEN5_SAMPLER_MESSAGE_SAMPLE_LD, > 1 /* rlen */, > - 1 /* mlen */, > - false /* header */, > + mlen, > + header_present, > BRW_SAMPLER_SIMD_MODE_SIMD4X2, > 0); > brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1); > @@ -1077,7 +1097,7 @@ > fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, > /* dst = send(offset, a0.0) */ > brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND); > brw_set_dest(p, insn_send, dst); > - brw_set_src0(p, insn_send, offset); > + brw_set_src0(p, insn_send, src); > brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr); > > brw_pop_insn_state(p); > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > index b88a579..19e82ef 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > @@ -328,6 +328,7 @@ vec4_generator::generate_tex(vec4_instruction *inst, > } else { > struct brw_reg header = > retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD); > + uint32_t dw2 = 0; > > /* Explicitly set up the message header by copying g0 to the MRF. */ > brw_push_insn_state(p); > @@ -336,11 +337,17 @@ vec4_generator::generate_tex(vec4_instruction *inst, > > brw_set_default_access_mode(p, BRW_ALIGN_1); > > - if (inst->offset) { > + if (inst->offset) > /* Set the texel offset bits in DWord 2. */ > - brw_MOV(p, get_element_ud(header, 2), > - brw_imm_ud(inst->offset)); > - } > + dw2 = inst->offset; > + > + if (brw->gen >= 9) > + /* SKL+ overloads BRW_SAMPLER_SIMD_MODE_SIMD4X2 to also do > SIMD8D, > + * based on bit 22 in the header. */
*/ goes on its own line. Thanks for porting this to the new (or old, if you prefer) generator framework. Reviewed-by: Kenneth Graunke <kenn...@whitecape.org>
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev