Jason Ekstrand <[email protected]> writes: > On Fri, Jan 18, 2019 at 6:09 PM Francisco Jerez <[email protected]> > wrote: > >> Currently the execution type calculation will return a bogus value in >> cases like: >> >> mov_indirect(8) vgrf0:w, vgrf1:w, vgrf2:ud, 32u >> >> Which will be considered to have a 32-bit integer execution type even >> though the actual indirect move operation will be carried out with >> 16-bit precision. >> >> Similarly there's no need to apply the CHV/BXT double-precision region >> alignment restrictions to such control sources, since they aren't >> directly involved in the double-precision arithmetic operations >> emitted by these virtual instructions. Applying the CHV/BXT >> restrictions to control sources was expected to be harmless if mildly >> inefficient, but unfortunately it exposed problems at codegen level >> for virtual instructions (namely the SHUFFLE instruction used for the >> Vulkan 1.1 subgroup feature) that weren't prepared to accept control >> sources with an arbitrary strided region. >> >> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109328 >> Reported-by: Mark Janes <[email protected]> >> Fixes: efa4e4bc5fc "intel/fs: Introduce regioning lowering pass." >> --- >> src/intel/compiler/brw_fs.cpp | 54 +++++++++++++++++++ >> src/intel/compiler/brw_fs_lower_regioning.cpp | 6 +-- >> src/intel/compiler/brw_ir_fs.h | 10 +++- >> 3 files changed, 66 insertions(+), 4 deletions(-) >> >> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp >> index 0359eb079f7..f475b617df2 100644 >> --- a/src/intel/compiler/brw_fs.cpp >> +++ b/src/intel/compiler/brw_fs.cpp >> @@ -271,6 +271,60 @@ fs_inst::is_send_from_grf() const >> } >> } >> >> +bool >> +fs_inst::is_control_source(unsigned arg) const >> +{ >> + switch (opcode) { >> + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: >> + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: >> + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: >> + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: >> + return arg == 0; >> + >> + case SHADER_OPCODE_BROADCAST: >> + case SHADER_OPCODE_SHUFFLE: >> + case SHADER_OPCODE_QUAD_SWIZZLE: >> + case FS_OPCODE_INTERPOLATE_AT_SAMPLE: >> + case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: >> + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: >> + case SHADER_OPCODE_IMAGE_SIZE: >> + case SHADER_OPCODE_GET_BUFFER_SIZE: >> + return arg == 1; >> + >> + case SHADER_OPCODE_MOV_INDIRECT: >> + case SHADER_OPCODE_CLUSTER_BROADCAST: >> + case SHADER_OPCODE_TEX: >> + case FS_OPCODE_TXB: >> + case SHADER_OPCODE_TXD: >> + case SHADER_OPCODE_TXF: >> + case SHADER_OPCODE_TXF_LZ: >> + case SHADER_OPCODE_TXF_CMS: >> + case SHADER_OPCODE_TXF_CMS_W: >> + case SHADER_OPCODE_TXF_UMS: >> + case SHADER_OPCODE_TXF_MCS: >> + case SHADER_OPCODE_TXL: >> + case SHADER_OPCODE_TXL_LZ: >> + case SHADER_OPCODE_TXS: >> + case SHADER_OPCODE_LOD: >> + case SHADER_OPCODE_TG4: >> + case SHADER_OPCODE_TG4_OFFSET: >> + case SHADER_OPCODE_SAMPLEINFO: >> + case SHADER_OPCODE_UNTYPED_ATOMIC: >> + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: >> + case SHADER_OPCODE_UNTYPED_SURFACE_READ: >> + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: >> + case SHADER_OPCODE_BYTE_SCATTERED_READ: >> + case SHADER_OPCODE_BYTE_SCATTERED_WRITE: >> + case SHADER_OPCODE_TYPED_ATOMIC: >> + case SHADER_OPCODE_TYPED_SURFACE_READ: >> + case SHADER_OPCODE_TYPED_SURFACE_WRITE: >> > > As of b284d222db, we are no longer using many of the opcodes in this list > (gen7 pull constant loads, [un]typed surface reads/writes, etc.) It will > need to be rebased and we need to add SHADER_OPCODE_SEND to the list. > Fortunately, the changes to add SHADER_OPCODE_SEND landed before the 19.0 > cut-off so there is no need to make two versions for backporting. >
Yes, that's roughly what I had done during one of my previous rebases of this series, see: https://cgit.freedesktop.org/~currojerez/mesa/commit/?h=jenkins&id=30f8f3ff48b02ead688705e0679a98c0d6c9c87e > Other than that, this patch seems perfectly reasonable to me > > Reviewed-by: Jason Ekstrand <[email protected]> > > If you want me to hand-review the new list of opcodes, feel free to send a > v2 and cc me. > > >> + return arg == 1 || arg == 2; >> + >> + default: >> + return false; >> + } >> +} >> + >> /** >> * Returns true if this instruction's sources and destinations cannot >> * safely be the same register. >> diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp >> b/src/intel/compiler/brw_fs_lower_regioning.cpp >> index df50993dee6..6a3c23892b4 100644 >> --- a/src/intel/compiler/brw_fs_lower_regioning.cpp >> +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp >> @@ -74,7 +74,7 @@ namespace { >> unsigned stride = inst->dst.stride * type_sz(inst->dst.type); >> >> for (unsigned i = 0; i < inst->sources; i++) { >> - if (!is_uniform(inst->src[i])) >> + if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) >> stride = MAX2(stride, inst->src[i].stride * >> type_sz(inst->src[i].type)); >> } >> @@ -92,7 +92,7 @@ namespace { >> required_dst_byte_offset(const fs_inst *inst) >> { >> for (unsigned i = 0; i < inst->sources; i++) { >> - if (!is_uniform(inst->src[i])) >> + if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) >> if (reg_offset(inst->src[i]) % REG_SIZE != >> reg_offset(inst->dst) % REG_SIZE) >> return 0; >> @@ -109,7 +109,7 @@ namespace { >> has_invalid_src_region(const gen_device_info *devinfo, const fs_inst >> *inst, >> unsigned i) >> { >> - if (is_unordered(inst)) { >> + if (is_unordered(inst) || inst->is_control_source(i)) { >> return false; >> } else { >> const unsigned dst_byte_stride = inst->dst.stride * >> type_sz(inst->dst.type); >> diff --git a/src/intel/compiler/brw_ir_fs.h >> b/src/intel/compiler/brw_ir_fs.h >> index 08e3d83d910..0a0ba1d363a 100644 >> --- a/src/intel/compiler/brw_ir_fs.h >> +++ b/src/intel/compiler/brw_ir_fs.h >> @@ -358,6 +358,13 @@ public: >> bool can_change_types() const; >> bool has_source_and_destination_hazard() const; >> >> + /** >> + * Return whether \p arg is a control source of a virtual instruction >> which >> + * shouldn't contribute to the execution type and usual regioning >> + * restriction calculations of arithmetic instructions. >> + */ >> + bool is_control_source(unsigned arg) const; >> + >> /** >> * Return the subset of flag registers read by the instruction as a >> bitset >> * with byte granularity. >> @@ -462,7 +469,8 @@ get_exec_type(const fs_inst *inst) >> brw_reg_type exec_type = BRW_REGISTER_TYPE_B; >> >> for (int i = 0; i < inst->sources; i++) { >> - if (inst->src[i].file != BAD_FILE) { >> + if (inst->src[i].file != BAD_FILE && >> + !inst->is_control_source(i)) { >> const brw_reg_type t = get_exec_type(inst->src[i].type); >> if (type_sz(t) > type_sz(exec_type)) >> exec_type = t; >> -- >> 2.19.2 >> >> _______________________________________________ >> mesa-dev mailing list >> [email protected] >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev >>
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
