On 03/31/2013 02:10 AM, Chris Forbes wrote:
Lowers ir_tg4 (from textureGather and textureGatherOffset builtins) to
SHADER_OPCODE_TG4.

The usual post-sampling swizzle workaround can't work for ir_tg4,
so avoid doing that:

* For R/G/B/A swizzles use the hardware channel select (lives in the
    same dword in the header as the texel offset), and then don't do
    anything afterward in the shader.
* For 0/1 swizzles blast the appropriate constant over all the output
    channels in swizzle_result().

Signed-off-by: Chris Forbes <[email protected]>
---
  src/mesa/drivers/dri/i965/brw_fs.h           |  1 +
  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 55 ++++++++++++++++++++++++++++
  2 files changed, 56 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index d9d17a2..bc93bdf 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -250,6 +250,7 @@ public:
     void visit(ir_function *ir);
     void visit(ir_function_signature *ir);

+   uint32_t gather_channel(ir_texture *ir, int sampler);
     void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler);

     bool can_do_source_mods(fs_inst *inst);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 8556b56..2b77883 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1119,6 +1119,14 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg 
dst, fs_reg coordinate,
        base_mrf--;
     }

+   if (ir->op == ir_tg4 && !header_present) {
+      /* ir_tg4 needs to place its channel select in the header,
+       * for interaction with ARB_texture_swizzle */
+      header_present = true;
+      mlen++;
+      base_mrf--;
+   }

I'm not a fan of duplicating this block. Why not just change the one above's condition to:

   if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf)) {

Feel free to keep the comment if you like.

+
     if (ir->shadow_comparitor) {
        emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
        mlen += reg_width;
@@ -1128,6 +1136,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, 
fs_reg coordinate,
     switch (ir->op) {
     case ir_tex:
     case ir_lod:
+   case ir_tg4:
        break;
     case ir_txb:
        emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
@@ -1242,6 +1251,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, 
fs_reg coordinate,
     case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst); break;
     case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break;
     case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break;
+   case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst); break;
     }
     inst->base_mrf = base_mrf;
     inst->mlen = mlen;
@@ -1394,6 +1404,7 @@ fs_visitor::visit(ir_texture *ir)
     switch (ir->op) {
     case ir_tex:
     case ir_lod:
+   case ir_tg4:
        break;
     case ir_txb:
        ir->lod_info.bias->accept(this);
@@ -1416,6 +1427,8 @@ fs_visitor::visit(ir_texture *ir)
        ir->lod_info.sample_index->accept(this);
        sample_index = this->result;
        break;
+   default:
+      assert(!"Unrecognized texture opcode");
     };

     /* Writemasking doesn't eliminate channels on SIMD8 texture
@@ -1440,6 +1453,9 @@ fs_visitor::visit(ir_texture *ir)
     if (ir->offset != NULL && ir->op != ir_txf)
        inst->texture_offset = brw_texture_offset(ir->offset->as_constant());

+   if (ir->op == ir_tg4)
+      inst->texture_offset |= gather_channel(ir, sampler) << 16; // M0.2:16-17

Clever. Not a bad approach, but perhaps we should rename the field to something more general. Then again, message_header_bits isn't much better...

     inst->sampler = sampler;

     if (ir->shadow_comparitor)
@@ -1460,6 +1476,24 @@ fs_visitor::visit(ir_texture *ir)
  }

  /**
+ * Set up the gather channel based on the swizzle, for gather4.
+ */
+uint32_t
+fs_visitor::gather_channel(ir_texture *ir, int sampler)
+{
+   int swiz = GET_SWZ(c->key.tex.swizzles[sampler], 0 /* red */);
+   switch (swiz) {
+      case SWIZZLE_X: return 0;
+      case SWIZZLE_Y: return 1;
+      case SWIZZLE_Z: return 2;
+      case SWIZZLE_W: return 3;
+      default:
+         /* zero, one swizzles */
+         return 0;
+   }
+}
+
+/**
   * Swizzle the result of a texture result.  This is necessary for
   * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons.
   */
@@ -1468,9 +1502,30 @@ fs_visitor::swizzle_result(ir_texture *ir, fs_reg 
orig_val, int sampler)
  {
     this->result = orig_val;

+   /* txs isn't actually sampling the texture */
     if (ir->op == ir_txs)
        return;

+   /* tg4 does the channel select in hardware for 'real' swizzles, but can't
+    * do the degenerate ZERO/ONE cases, so we do them here:
+    *
+    * blast all the output channels with zero or one as appropriate
+    */

So, if texture swizzling selects the channel...then zero/one are stupid.

It might be better to put this in visit(ir_texture *), and short-circuit the texturing operation altogether. Why emit a gather4 only to throw away the results entirely?

+   if (ir->op == ir_tg4) {
+      int swiz = GET_SWZ(c->key.tex.swizzles[sampler], 0);
+      if (swiz != SWIZZLE_ZERO && swiz != SWIZZLE_ONE)
+         return;
+
+      fs_reg res = fs_reg(this, glsl_type::vec4_type);
+      this->result = res;
+
+      for (int i=0; i<4; i++) {
+         emit(MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f)));
+         res.reg_offset++;
+      }
+      return;
+   }
+
     if (ir->type == glsl_type::float_type) {
        /* Ignore DEPTH_TEXTURE_MODE swizzling. */
        assert(ir->sampler->type->sampler_shadow);
_______________________________________________
mesa-dev mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to