This is the only non-trivial instruction manipulator. It enables using ALIGN16 predication modes in the scalar back-end without emitting any additional instructions by using a combination of predication and conditional mods. exec_reduce() prepares an instruction for the desired reduction mode (a no-op except for SVEC4 instructions in the scalar back-end), subsequent instructions can then predicate on the result of the reduction by using the NORMAL predication mode on the single flag register written by the generating instruction. --- src/mesa/drivers/dri/i965/brw_ir_fs.h | 9 +++++++ src/mesa/drivers/dri/i965/brw_ir_svec4.h | 45 ++++++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_ir_vec4.h | 10 +++++++ 3 files changed, 64 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index 7e5083c..c9d40ce 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -373,4 +373,13 @@ exec_saturate(bool saturate, fs_inst *inst) return inst; } +/** + * No-op. See the SVEC4 implementation. + */ +static inline fs_inst * +exec_reduce(brw_predicate pred, fs_inst *inst) +{ + return inst; +} + #endif diff --git a/src/mesa/drivers/dri/i965/brw_ir_svec4.h b/src/mesa/drivers/dri/i965/brw_ir_svec4.h index 508ed5e..36164a9 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_svec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_svec4.h @@ -439,6 +439,51 @@ namespace brw { return inst; } + + /** + * Perform a vector reduction on the flag result of \p inst. This allows + * using vector predication modes (the ALIGN16 ones) with SVEC4 + * instructions, even though we may not have enough flag registers + * available to hold the flag results from all components. + * + * The (largely inconsequential) limitation is that the predication mode + * has to be already known by the generating instruction (i.e. \p inst), so + * you cannot apply different ALIGN16 predication modes on the flag result + * of the same reduced instruction. + */ + inline svec4_inst * + exec_reduce(brw_predicate pred, svec4_inst *inst) + { + switch (pred) { + case BRW_PREDICATE_ALIGN16_REPLICATE_X: + case BRW_PREDICATE_ALIGN16_REPLICATE_Y: + case BRW_PREDICATE_ALIGN16_REPLICATE_Z: + case BRW_PREDICATE_ALIGN16_REPLICATE_W: { + const unsigned j = pred - BRW_PREDICATE_ALIGN16_REPLICATE_X; + + for (unsigned i = 0; i < ARRAY_SIZE(inst->v); ++i) { + if (inst->v[i] && i != j) + exec_condmod(BRW_CONDITIONAL_NONE, inst->v[i]); + } + + return inst; + } + case BRW_PREDICATE_ALIGN16_ANY4H: + case BRW_PREDICATE_ALIGN16_ALL4H: { + const bool invert = (pred == BRW_PREDICATE_ALIGN16_ANY4H); + unsigned j = 0; + + for (unsigned i = 0; i < ARRAY_SIZE(inst->v); ++i) { + if (inst->v[i] && j++ > 0) + exec_predicate_inv(BRW_PREDICATE_NORMAL, invert, inst->v[i]); + } + + return inst; + } + default: + unreachable("Not reached"); + } + } } #endif diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index a407ec4..a9a1e0b 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -374,6 +374,16 @@ exec_saturate(bool saturate, vec4_instruction *inst) inst->saturate = saturate; return inst; } + +/** + * No-op. See the SVEC4 implementation. + */ +inline vec4_instruction * +exec_reduce(brw_predicate pred, vec4_instruction *inst) +{ + return inst; +} + } /* namespace brw */ #endif -- 2.3.5 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev