From: Chia-I Wu <[email protected]> Add fs_visitor::emit_dual_texture_gen7 that emulate SIMD16 sample_d with dual SIMD8 sample_d on gen7+.
No piglit quick.tests regression on Ivy Bridge and Haswell. Improved Xonotic with Ultra effects by 6.0209% +/- 0.396586% (N=11) on Haswell. v2: no change v3: reworked because of texture-from-GRF changes Signed-off-by: Chia-I Wu <[email protected]> --- src/mesa/drivers/dri/i965/brw_fs.h | 3 + src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 117 ++++++++++++++++++++++++++- 2 files changed, 118 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index c2ba351..05bf39e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -347,6 +347,9 @@ public: fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, fs_reg shadow_comp, fs_reg lod, fs_reg lod2, fs_reg sample_index, int sampler); + void emit_dual_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, + fs_reg shadow_comp, fs_reg lod, fs_reg lod2, + fs_reg sample_index, int sampler); fs_inst *emit_texture(ir_texture *ir, fs_reg dst, fs_reg payload, int mlen, bool header_present, int regs_written, int sampler); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index d164b04..19e3f1e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1394,6 +1394,114 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, return emit_texture(ir, dst, payload, mlen, header_present, 4, sampler); } +/* Emulate a SIMD16 sampler message with dual SIMD8 sampler messages. For + * now, and for pratical reaons, only ir_txd is supported. + */ +void +fs_visitor::emit_dual_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, + fs_reg shadow_c, fs_reg lod, fs_reg lod2, + fs_reg sample_index, int sampler) +{ + /* no need to emit dual SIMD8 messages */ + if (dispatch_width != 16 || ir->op != ir_txd) { + emit_texture_gen7(ir, dst, coordinate, shadow_c, + lod, lod2, sample_index, sampler); + return; + } + + fs_reg simd8_dst = fs_reg(GRF, virtual_grf_alloc(4), + brw_type_for_base_type(ir->type)); + +#define ADVANCE_HALF(reg) \ + do { reg.reg_offset += reg.sechalf; reg.sechalf = !reg.sechalf; } while (0) + + for (int msg = 0; msg < 2; msg++) { + bool header_present = false; + fs_reg payload = fs_reg(this, glsl_type::float_type); + fs_reg next = payload; + + if (msg == 0) + push_force_uncompressed(); + else + push_force_sechalf(); + + /* only txd is supported for now */ + assert(ir->op == ir_txd); + + if (ir->offset) { + /* Need the header to put texture offsets in */ + header_present = true; + ADVANCE_HALF(next); + } + + if (ir->shadow_comparitor) { + emit(MOV(next, shadow_c)); + ADVANCE_HALF(next); + } + + /* Load dPdx and the coordinate together: + * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z + */ + fs_reg coord = coordinate, ddx = lod, ddy = lod2; + for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + emit(MOV(next, coord)); + coord.reg_offset++; + ADVANCE_HALF(next); + + /* For cube map array, the coordinate is (u,v,r,ai) but there are + * only derivatives for (u, v, r). + */ + if (i < ir->lod_info.grad.dPdx->type->vector_elements) { + emit(MOV(next, ddx)); + ddx.reg_offset++; + ADVANCE_HALF(next); + + emit(MOV(next, ddy)); + ddy.reg_offset++; + ADVANCE_HALF(next); + } + } + + int mlen = next.reg_offset * 2 + next.sechalf; + if (mlen > 11) { + fail("Message length >11 disallowed by hardware\n"); + break; + } + + /* Message length is mlen and response length is 4. In vgrf, that means + * (mlen + 1) / 2 registers for payload and 2 registers for writeback. + */ + virtual_grf_sizes[payload.reg] = (mlen + 1) / 2; + emit_texture(ir, simd8_dst, payload, mlen, header_present, 2, sampler); + + fs_reg d = dst, s = simd8_dst; + d.sechalf = (msg == 1); + + /* swizzle the result to match SIMD16 writeback */ + for (int i = 0; i < 4; i++) { + emit(MOV(d, s)); + d.reg_offset++; + ADVANCE_HALF(s); + } + + if (msg == 0) { + pop_force_uncompressed(); + + /* prepare for the second message */ + simd8_dst.reg_offset += 2; + coordinate.sechalf = true; + shadow_c.sechalf = true; + lod.sechalf = true; + lod2.sechalf = true; + sample_index.sechalf = true; + } else { + pop_force_sechalf(); + } + } + +#undef ADVANCE_HALF +} + fs_reg fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate, bool is_rect, int sampler, int texunit) @@ -1586,8 +1694,13 @@ fs_visitor::visit(ir_texture *ir) fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1)); if (brw->gen >= 7) { - emit_texture_gen7(ir, dst, coordinate, shadow_comparitor, - lod, lod2, sample_index, sampler); + if (dispatch_width == 16 && ir->op == ir_txd) { + emit_dual_texture_gen7(ir, dst, coordinate, shadow_comparitor, + lod, lod2, sample_index, sampler); + } else { + emit_texture_gen7(ir, dst, coordinate, shadow_comparitor, + lod, lod2, sample_index, sampler); + } } else if (brw->gen >= 5) { emit_texture_gen5(ir, dst, coordinate, shadow_comparitor, lod, lod2, sample_index, sampler); -- 1.8.3.1 _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
