This patch is: Reviewed-by: Chris Forbes <[email protected]>
On Wed, Mar 12, 2014 at 9:28 PM, Kenneth Graunke <[email protected]> wrote: > Ideally, we'd like to never even attempt the SIMD16 compile if we could > know ahead of time that it won't succeed---it's purely a waste of time. > This is especially important for state-based recompiles, which happen at > draw time. > > The fragment shader compiler has a number of checks like: > > if (dispatch_width == 16) > fail("...some reason..."); > > This patch introduces a new no16() function which replaces the above > pattern. In the SIMD8 compile, it sets a "SIMD16 will never work" flag. > Then, brw_wm_fs_emit can check that flag, skip the SIMD16 compile, and > issue a helpful performance warning if INTEL_DEBUG=perf is set. (In > SIMD16 mode, no16() calls fail(), for safety's sake.) > > The great part is that this is not a heuristic---if the flag is set, we > know with 100% certainty that the SIMD16 compile would fail. (It might > fail anyway if we run out of registers, but it's always worth trying.) > > Signed-off-by: Kenneth Graunke <[email protected]> > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 69 > +++++++++++++++++++++++----- > src/mesa/drivers/dri/i965/brw_fs.h | 4 ++ > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 44 +++++++++--------- > 3 files changed, 83 insertions(+), 34 deletions(-) > > I forgot to send this one out...it applies on top of the previous 7 patches. > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 62848be..9ad80c5 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -647,9 +647,8 @@ fs_visitor::emit_shader_time_write(enum > shader_time_shader_type type, > } > > void > -fs_visitor::fail(const char *format, ...) > +fs_visitor::vfail(const char *format, va_list va) > { > - va_list va; > char *msg; > > if (failed) > @@ -657,9 +656,7 @@ fs_visitor::fail(const char *format, ...) > > failed = true; > > - va_start(va, format); > msg = ralloc_vasprintf(mem_ctx, format, va); > - va_end(va); > msg = ralloc_asprintf(mem_ctx, "FS compile failed: %s\n", msg); > > this->fail_msg = msg; > @@ -669,6 +666,49 @@ fs_visitor::fail(const char *format, ...) > } > } > > +void > +fs_visitor::fail(const char *format, ...) > +{ > + va_list va; > + > + va_start(va, format); > + vfail(format, va); > + va_end(va); > +} > + > +/** > + * Mark this program as impossible to compile in SIMD16 mode. > + * > + * During the SIMD8 compile (which happens first), we can detect and flag > + * things that are unsupported in SIMD16 mode, so the compiler can skip > + * the SIMD16 compile altogether. > + * > + * During a SIMD16 compile (if one happens anyway), this just calls fail(). > + */ > +void > +fs_visitor::no16(const char *format, ...) > +{ > + va_list va; > + > + va_start(va, format); > + > + if (dispatch_width == 16) { > + vfail(format, va); > + return; > + } > + > + simd16_unsupported = true; > + > + if (INTEL_DEBUG & DEBUG_PERF) { > + if (no16_msg) > + ralloc_vasprintf_append(&no16_msg, format, va); > + else > + no16_msg = ralloc_vasprintf(mem_ctx, format, va); > + } > + > + va_end(va); > +} > + > fs_inst * > fs_visitor::emit(enum opcode opcode) > { > @@ -1356,8 +1396,8 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, > fs_reg src0, fs_reg src1) > switch (opcode) { > case SHADER_OPCODE_INT_QUOTIENT: > case SHADER_OPCODE_INT_REMAINDER: > - if (brw->gen >= 7 && dispatch_width == 16) > - fail("SIMD16 INTDIV unsupported\n"); > + if (brw->gen >= 7) > + no16("SIMD16 INTDIV unsupported\n"); > break; > case SHADER_OPCODE_POW: > break; > @@ -3504,13 +3544,18 @@ brw_wm_fs_emit(struct brw_context *brw, struct > brw_wm_compile *c, > exec_list *simd16_instructions = NULL; > fs_visitor v2(brw, c, prog, fp, 16); > if (brw->gen >= 5 && likely(!(INTEL_DEBUG & DEBUG_NO16))) { > - /* Try a SIMD16 compile */ > - v2.import_uniforms(&v); > - if (!v2.run()) { > - perf_debug("SIMD16 shader failed to compile, falling back to " > - "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg); > + if (!v.simd16_unsupported) { > + /* Try a SIMD16 compile */ > + v2.import_uniforms(&v); > + if (!v2.run()) { > + perf_debug("SIMD16 shader failed to compile, falling back to " > + "SIMD8 at a 10-20%% performance cost: %s", > v2.fail_msg); > + } else { > + simd16_instructions = &v2.instructions; > + } > } else { > - simd16_instructions = &v2.instructions; > + perf_debug("SIMD16 shader unsupported, falling back to " > + "SIMD8 at a 10-20%% performance cost: %s", v.no16_msg); > } > } > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index 9de1f3a..0d064f6 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -381,7 +381,9 @@ public: > void insert_gen4_send_dependency_workarounds(); > void insert_gen4_pre_send_dependency_workarounds(fs_inst *inst); > void insert_gen4_post_send_dependency_workarounds(fs_inst *inst); > + void vfail(const char *msg, va_list args); > void fail(const char *msg, ...); > + void no16(const char *msg, ...); > void lower_uniform_pull_constant_loads(); > > void push_force_uncompressed(); > @@ -541,6 +543,8 @@ public: > > bool failed; > char *fail_msg; > + bool simd16_unsupported; > + char *no16_msg; > > /* Result of last visit() method. */ > fs_reg result; > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index cd90e23..a832268 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -462,8 +462,8 @@ fs_visitor::visit(ir_expression *ir) > * FINISHME: Emit just the MUL if we know an operand is small > * enough. > */ > - if (brw->gen >= 7 && dispatch_width == 16) > - fail("SIMD16 explicit accumulator operands unsupported\n"); > + if (brw->gen >= 7) > + no16("SIMD16 explicit accumulator operands unsupported\n"); > > struct brw_reg acc = retype(brw_acc_reg(), this->result.type); > > @@ -475,8 +475,8 @@ fs_visitor::visit(ir_expression *ir) > } > break; > case ir_binop_imul_high: { > - if (brw->gen >= 7 && dispatch_width == 16) > - fail("SIMD16 explicit accumulator operands unsupported\n"); > + if (brw->gen >= 7) > + no16("SIMD16 explicit accumulator operands unsupported\n"); > > struct brw_reg acc = retype(brw_acc_reg(), this->result.type); > > @@ -490,8 +490,8 @@ fs_visitor::visit(ir_expression *ir) > emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); > break; > case ir_binop_carry: { > - if (brw->gen >= 7 && dispatch_width == 16) > - fail("SIMD16 explicit accumulator operands unsupported\n"); > + if (brw->gen >= 7) > + no16("SIMD16 explicit accumulator operands unsupported\n"); > > struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD); > > @@ -500,8 +500,8 @@ fs_visitor::visit(ir_expression *ir) > break; > } > case ir_binop_borrow: { > - if (brw->gen >= 7 && dispatch_width == 16) > - fail("SIMD16 explicit accumulator operands unsupported\n"); > + if (brw->gen >= 7) > + no16("SIMD16 explicit accumulator operands unsupported\n"); > > struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD); > > @@ -1290,8 +1290,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg > dst, fs_reg coordinate, > next.reg_offset++; > break; > case ir_txd: { > - if (dispatch_width == 16) > - fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); > + no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); > > /* Load dPdx and the coordinate together: > * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, > dPdy.z > @@ -1364,8 +1363,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg > dst, fs_reg coordinate, > break; > case ir_tg4: > if (has_nonconstant_offset) { > - if (ir->shadow_comparitor && dispatch_width == 16) > - fail("Gen7 does not support gather4_po_c in SIMD16 mode."); > + if (ir->shadow_comparitor) > + no16("Gen7 does not support gather4_po_c in SIMD16 mode."); > > /* More crazy intermixing */ > ir->offset->accept(this); > @@ -1464,8 +1463,8 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg > coordinate, > 0 > }; > > + no16("rectangle scale uniform setup not supported on SIMD16\n"); > if (dispatch_width == 16) { > - fail("rectangle scale uniform setup not supported on SIMD16\n"); > return coordinate; > } > > @@ -2183,8 +2182,8 @@ fs_visitor::try_replace_with_sel() > void > fs_visitor::visit(ir_if *ir) > { > - if (brw->gen < 6 && dispatch_width == 16) { > - fail("Can't support (non-uniform) control flow on SIMD16\n"); > + if (brw->gen < 6) { > + no16("Can't support (non-uniform) control flow on SIMD16\n"); > } > > /* Don't point the annotation at the if statement, because then it plus > @@ -2226,8 +2225,8 @@ fs_visitor::visit(ir_if *ir) > void > fs_visitor::visit(ir_loop *ir) > { > - if (brw->gen < 6 && dispatch_width == 16) { > - fail("Can't support (non-uniform) control flow on SIMD16\n"); > + if (brw->gen < 6) { > + no16("Can't support (non-uniform) control flow on SIMD16\n"); > } > > this->base_ir = NULL; > @@ -2725,9 +2724,10 @@ fs_visitor::emit_fb_writes() > bool do_dual_src = this->dual_src_output.file != BAD_FILE; > bool src0_alpha_to_render_target = false; > > - if (dispatch_width == 16 && do_dual_src) { > - fail("GL_ARB_blend_func_extended not yet supported in SIMD16."); > - do_dual_src = false; > + if (do_dual_src) { > + no16("GL_ARB_blend_func_extended not yet supported in SIMD16."); > + if (dispatch_width == 16) > + do_dual_src = false; > } > > /* From the Sandy Bridge PRM, volume 4, page 198: > @@ -2778,13 +2778,13 @@ fs_visitor::emit_fb_writes() > nr += reg_width; > > if (c->source_depth_to_render_target) { > - if (brw->gen == 6 && dispatch_width == 16) { > + if (brw->gen == 6) { > /* For outputting oDepth on gen6, SIMD8 writes have to be > * used. This would require SIMD8 moves of each half to > * message regs, kind of like pre-gen5 SIMD16 FB writes. > * Just bail on doing so for now. > */ > - fail("Missing support for simd16 depth writes on gen6\n"); > + no16("Missing support for simd16 depth writes on gen6\n"); > } > > if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { > -- > 1.9.0 > > _______________________________________________ > mesa-dev mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
