This instruction calculates the index of an arbitrary channel enabled
in the current execution mask. It's expected to be used as input for
the BROADCAST opcode, but it's implemented as a separate instruction
rather than being baked into BROADCAST because FIND_LIVE_CHANNEL has
no dependencies so it can always be CSE'ed with other instances of the
same instruction within a basic block.
---
src/mesa/drivers/dri/i965/brw_defines.h | 8 +++
src/mesa/drivers/dri/i965/brw_eu.h | 4 ++
src/mesa/drivers/dri/i965/brw_eu_emit.c | 70 ++++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 4 ++
src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 4 ++
6 files changed, 92 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h
b/src/mesa/drivers/dri/i965/brw_defines.h
index d4930e3..2b52fb2 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -912,6 +912,14 @@ enum opcode {
SHADER_OPCODE_URB_WRITE_SIMD8,
/**
+ * Return the index of an arbitrary live channel (i.e. one of the channels
+ * enabled in the current execution mask) and assign it to the first
+ * component of the destination. Expected to be used as input for the
+ * BROADCAST pseudo-opcode.
+ */
+ SHADER_OPCODE_FIND_LIVE_CHANNEL,
+
+ /**
* Pick the channel from its first source register given by the index
* specified as second source. Useful for variable indexing of surfaces.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h
b/src/mesa/drivers/dri/i965/brw_eu.h
index 2505480..1a8b38c 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -414,6 +414,10 @@ brw_pixel_interpolator_query(struct brw_compile *p,
unsigned response_length);
void
+brw_find_live_channel(struct brw_compile *p,
+ struct brw_reg dst);
+
+void
brw_broadcast(struct brw_compile *p,
struct brw_reg dst,
struct brw_reg src,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index d7e3995..7899f83 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2855,6 +2855,76 @@ brw_pixel_interpolator_query(struct brw_compile *p,
}
void
+brw_find_live_channel(struct brw_compile *p, struct brw_reg dst)
+{
+ const struct brw_context *brw = p->brw;
+ brw_inst *inst;
+
+ assert(brw->gen >= 7);
+
+ brw_push_insn_state(p);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+
+ if (brw_inst_access_mode(brw, p->current) == BRW_ALIGN_1) {
+ if (brw->gen >= 8) {
+ /* Getting the first active channel index is easy on Gen8: Just find
+ * the first bit set in the mask register. The same register exists
+ * on HSW already but it reads back as all ones when the current
+ * instruction has execution masking disabled, so it's kind of
+ * useless.
+ */
+ inst = brw_FBL(p, vec1(dst),
+ retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD));
+
+ /* Quarter control has the effect of magically shifting the value of
+ * this register. Make sure it's set to zero.
+ */
+ brw_inst_set_qtr_control(brw, inst, GEN6_COMPRESSION_1Q);
+
+ } else {
+ const struct brw_reg flag = retype(brw_flag_reg(1, 0),
+ BRW_REGISTER_TYPE_UD);
+
+ brw_MOV(p, flag, brw_imm_ud(0));
+
+ /* Run a 16-wide instruction returning zero with execution masking
+ * and a conditional modifier enabled in order to get the current
+ * execution mask in f1.0.
+ */
+ inst = brw_MOV(p, vec16(brw_null_reg()), brw_imm_ud(0));
+ brw_inst_set_mask_control(brw, inst, BRW_MASK_ENABLE);
+ brw_inst_set_cond_modifier(brw, inst, BRW_CONDITIONAL_Z);
+ brw_inst_set_flag_reg_nr(brw, inst, 1);
+
+ brw_FBL(p, vec1(dst), flag);
+ }
+
+ } else {
+ if (brw->gen >= 8) {
+ /* In SIMD4x2 mode the first active channel index is just the
+ * negation of the first bit of the mask register.
+ */
+ inst = brw_AND(p, brw_writemask(dst, WRITEMASK_X),
+ negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)),
+ brw_imm_ud(1));
+
+ } else {
+ /* Overwrite the destination without and with execution masking to
+ * find out which of the channels is active.
+ */
+ brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
+ brw_imm_ud(1));
+
+ inst = brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
+ brw_imm_ud(0));
+ brw_inst_set_mask_control(brw, inst, BRW_MASK_ENABLE);
+ }
+ }
+
+ brw_pop_insn_state(p);
+}
+
+void
brw_broadcast(struct brw_compile *p,
struct brw_reg dst,
struct brw_reg src,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index b611641..554f7e0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -2015,6 +2015,10 @@ fs_generator::generate_code(const cfg_t *cfg, int
dispatch_width)
generate_set_simd4x2_offset(inst, dst, src[0]);
break;
+ case SHADER_OPCODE_FIND_LIVE_CHANNEL:
+ brw_find_live_channel(p, dst);
+ break;
+
case SHADER_OPCODE_BROADCAST:
brw_broadcast(p, dst, src[0], src[1]);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index bbb5532..5926f25 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -471,6 +471,8 @@ brw_instruction_name(enum opcode op)
case SHADER_OPCODE_URB_WRITE_SIMD8:
return "gen8_urb_write_simd8";
+ case SHADER_OPCODE_FIND_LIVE_CHANNEL:
+ return "find_live_channel";
case SHADER_OPCODE_BROADCAST:
return "broadcast";
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 60384c3..649dca9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1508,6 +1508,10 @@ vec4_generator::generate_code(const cfg_t *cfg)
generate_untyped_surface_read(inst, dst, src[0]);
break;
+ case SHADER_OPCODE_FIND_LIVE_CHANNEL:
+ brw_find_live_channel(p, dst);
+ break;
+
case SHADER_OPCODE_BROADCAST:
brw_broadcast(p, dst, src[0], src[1]);
break;
--
2.1.3
_______________________________________________
mesa-dev mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/mesa-dev