This allows quite a bit of infra to be kept as is, such as liveness analysis, copy propagation and dead code elimination.
Here one deals with virtual register space and this doesn't prevent from packing more than one component into one hardware register later on. That is entirely matter of register allocator working with sub-registers. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs.h | 1 + src/intel/compiler/brw_fs_nir.cpp | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index d9c4f737e6..b23d2b1733 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -233,6 +233,7 @@ public: fs_reg get_nir_src_imm(const nir_src &src); fs_reg get_nir_dest(const nir_dest &dest, bool pad_components_to_full_registers = false); + fs_reg get_nir_alu_dest(const nir_alu_instr *instr); fs_reg get_nir_image_deref(const nir_deref_var *deref); fs_reg get_indirect_offset(nir_intrinsic_instr *instr); void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst, diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index d3125d7dcd..cbb1c118d2 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -656,7 +656,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key; fs_inst *inst; - fs_reg result = get_nir_dest(instr->dest.dest); + fs_reg result = get_nir_alu_dest(instr); result.type = brw_type_for_nir_type(devinfo, (nir_alu_type)(nir_op_infos[instr->op].output_type | nir_dest_bit_size(instr->dest.dest))); @@ -1624,6 +1624,23 @@ fs_visitor::get_nir_dest(const nir_dest &dest, } fs_reg +fs_visitor::get_nir_alu_dest(const nir_alu_instr *instr) +{ + /* With data type size =< 16 bits one can fit two or more components + * into one register. In virtual register space this doesn't really add + * any value but requires things such as liveness analysis, + * copy propagation and dead code elimination to be updated to work with + * sub-regsiter regions. + * + * Therefore instead allocate full padded registers per component. This + * doesn't prevent final hardware register allocator from packing more than + * one component per register. + */ + const bool pad_components_to_full_register = true; + return get_nir_dest(instr->dest.dest, pad_components_to_full_register); +} + +fs_reg fs_visitor::get_nir_image_deref(const nir_deref_var *deref) { fs_reg image(UNIFORM, deref->var->data.driver_location / 4, -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev