On 01/29/2014 02:36 PM, Kenneth Graunke wrote: > I'd neglected to port these to Broadwell. Most of this code is copy > and pasted from Gen7, but instead of using F32TO16/F16TO32, we just > use MOV with HF register types. > > Fixes fs-packHalf2x16 and fs-unpackHalf2x16 tests (both the ARB > extension and ES 3.0 variants).
In light of the redaction on patch 2, is this actually correct? > Signed-off-by: Kenneth Graunke <[email protected]> One nearly infinitesimal nit below. Assume the commit message is correct, this patch is Reviewed-by: Ian Romanick <[email protected]> > --- > src/mesa/drivers/dri/i965/brw_fs.h | 7 +++ > src/mesa/drivers/dri/i965/gen8_fs_generator.cpp | 76 > ++++++++++++++++++++++++- > 2 files changed, 81 insertions(+), 2 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index 9c5c13a..5c7f2ce 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -673,6 +673,13 @@ private: > void generate_set_simd4x2_offset(fs_inst *ir, > struct brw_reg dst, > struct brw_reg offset); > + void generate_pack_half_2x16_split(fs_inst *inst, > + struct brw_reg dst, > + struct brw_reg x, > + struct brw_reg y); > + void generate_unpack_half_2x16_split(fs_inst *inst, > + struct brw_reg dst, > + struct brw_reg src); > void generate_discard_jump(fs_inst *ir); > > void patch_discard_jumps_to_fb_writes(); > diff --git a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp > b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp > index 6793ce0..43eaa35 100644 > --- a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp > @@ -582,6 +582,78 @@ gen8_fs_generator::generate_set_simd4x2_offset(fs_inst > *ir, > MOV_RAW(retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value); > } > > +/** > + * Change the register's data type from UD to HF, doubling the strides in > order > + * to compensate for halving the data type width. > + */ > +static struct brw_reg > +ud_reg_to_hf(struct brw_reg r) > +{ > + assert(r.type == BRW_REGISTER_TYPE_UD); > + r.type = BRW_REGISTER_TYPE_HF; > + > + /* The BRW_*_STRIDE enums are defined so that incrementing the field > + * doubles the real stride. > + */ > + if (r.hstride != 0) > + ++r.hstride; > + if (r.vstride != 0) > + ++r.vstride; > + > + return r; > +} > + > +void > +gen8_fs_generator::generate_pack_half_2x16_split(fs_inst *inst, > + struct brw_reg dst, > + struct brw_reg x, > + struct brw_reg y) > +{ > + assert(dst.type == BRW_REGISTER_TYPE_UD); > + assert(x.type == BRW_REGISTER_TYPE_F); > + assert(y.type == BRW_REGISTER_TYPE_F); > + > + struct brw_reg dst_hf = ud_reg_to_hf(dst); > + > + /* Give each 32-bit channel of dst the form below , where "." means ^ extra space > + * unchanged. > + * 0x....hhhh > + */ > + MOV(dst_hf, y); > + > + /* Now the form: > + * 0xhhhh0000 > + */ > + SHL(dst, dst, brw_imm_ud(16u)); > + > + /* And, finally the form of packHalf2x16's output: > + * 0xhhhhllll > + */ > + MOV(dst_hf, x); > +} > + > +void > +gen8_fs_generator::generate_unpack_half_2x16_split(fs_inst *inst, > + struct brw_reg dst, > + struct brw_reg src) > +{ > + assert(dst.type == BRW_REGISTER_TYPE_F); > + assert(src.type == BRW_REGISTER_TYPE_UD); > + > + struct brw_reg src_hf = ud_reg_to_hf(src); > + > + /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll. > + * For the Y case, we wish to access only the upper word; therefore > + * a 16-bit subregister offset is needed. > + */ > + assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X || > + inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y); > + if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y) > + src_hf.subnr += 2; > + > + MOV(dst, src_hf); > +} > + > void > gen8_fs_generator::generate_code(exec_list *instructions) > { > @@ -965,12 +1037,12 @@ gen8_fs_generator::generate_code(exec_list > *instructions) > break; > > case FS_OPCODE_PACK_HALF_2x16_SPLIT: > - assert(!"XXX: Missing Gen8 scalar support for > PACK_HALF_2x16_SPLIT"); > + generate_pack_half_2x16_split(ir, dst, src[0], src[1]); > break; > > case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: > case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: > - assert(!"XXX: Missing Gen8 scalar support for > UNPACK_HALF_2x16_SPLIT"); > + generate_unpack_half_2x16_split(ir, dst, src[0]); > break; > > case FS_OPCODE_PLACEHOLDER_HALT: _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
