anyway, I think it might make sense to take a look at the shaders hurt most as I suspect there might be a way to improve the situation a little
On Sat, Jul 7, 2018 at 3:38 AM, Karol Herbst <[email protected]> wrote: > okay right, if loading those special regs is indeed more expensive > than doing the read + a few extbf then I see the point of this > optimization > > On Sat, Jul 7, 2018 at 2:46 AM, Ilia Mirkin <[email protected]> wrote: >> Are they? Fewer special reg loads = better... >> >> On Fri, Jul 6, 2018 at 8:31 PM, Karol Herbst <[email protected]> wrote: >>> somehow it doesn't really look like that it is worth the effort as the >>> generated shaders are worse in avg? >>> >>> On Fri, Jul 6, 2018 at 10:32 PM, Rhys Perry <[email protected]> >>> wrote: >>>> This patch doesn't touch NTID since it seems very difficult (or >>>> impossible) to generate. Seemingly because the state tracker or glsl >>>> compiler is turning gl_WorkGroupSize into a immediate. Such a >>>> transformation is not possible with GL_ARB_compute_variable_group_size >>>> but gl_WorkGroupSize is not available when that extension is enabled. >>>> >>>> On Kepler+, it seems to end up being lowered into constant buffer loads >>>> anyway. >>>> >>>> On Fri, Jul 6, 2018 at 9:21 PM, Rhys Perry <[email protected]> >>>> wrote: >>>>> total instructions in shared programs : 5804448 -> 5804690 (0.00%) >>>>> total gprs used in shared programs : 670065 -> 670065 (0.00%) >>>>> total shared used in shared programs : 548832 -> 548832 (0.00%) >>>>> total local used in shared programs : 21068 -> 21068 (0.00%) >>>>> >>>>> local shared gpr inst bytes >>>>> helped 0 0 0 5 5 >>>>> hurt 0 0 0 191 191 >>>>> >>>>> Signed-off-by: Rhys Perry <[email protected]> >>>>> --- >>>>> Instead of combining SV_TID reads into a SV_COMBINED_TID read, this has >>>>> SV_TID reads lowered into a SV_COMBINED_TID read which can them be >>>>> combined by CSE and then turned into a SV_TID read by AlgebraicOpt if it >>>>> doesn't create another RDSV. >>>>> >>>>> src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + >>>>> .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 1 + >>>>> .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 1 + >>>>> .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 1 + >>>>> .../nouveau/codegen/nv50_ir_lowering_nv50.cpp | 3 ++ >>>>> .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 12 +++++++ >>>>> .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 40 >>>>> ++++++++++++++++++++++ >>>>> .../drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + >>>>> .../nouveau/codegen/nv50_ir_target_nv50.cpp | 1 + >>>>> 9 files changed, 61 insertions(+) >>>>> >>>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h >>>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir.h >>>>> index f4f3c70888..0b220cc48d 100644 >>>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h >>>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h >>>>> @@ -453,6 +453,7 @@ enum SVSemantic >>>>> SV_TESS_INNER, >>>>> SV_TESS_COORD, >>>>> SV_TID, >>>>> + SV_COMBINED_TID, >>>>> SV_CTAID, >>>>> SV_NTID, >>>>> SV_GRIDID, >>>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp >>>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp >>>>> index 647d1a5d0e..2118c3153f 100644 >>>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp >>>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp >>>>> @@ -2297,6 +2297,7 @@ CodeEmitterGK110::getSRegEncoding(const ValueRef& >>>>> ref) >>>>> case SV_INVOCATION_ID: return 0x11; >>>>> case SV_YDIR: return 0x12; >>>>> case SV_THREAD_KILL: return 0x13; >>>>> + case SV_COMBINED_TID: return 0x20; >>>>> case SV_TID: return 0x21 + SDATA(ref).sv.index; >>>>> case SV_CTAID: return 0x25 + SDATA(ref).sv.index; >>>>> case SV_NTID: return 0x29 + SDATA(ref).sv.index; >>>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >>>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >>>>> index 26826d6360..694d1b10a3 100644 >>>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >>>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >>>>> @@ -267,6 +267,7 @@ CodeEmitterGM107::emitSYS(int pos, const Value *val) >>>>> case SV_INVOCATION_ID : id = 0x11; break; >>>>> case SV_THREAD_KILL : id = 0x13; break; >>>>> case SV_INVOCATION_INFO: id = 0x1d; break; >>>>> + case SV_COMBINED_TID : id = 0x20; break; >>>>> case SV_TID : id = 0x21 + val->reg.data.sv.index; break; >>>>> case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break; >>>>> case SV_LANEMASK_EQ : id = 0x38; break; >>>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp >>>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp >>>>> index d85fdda56f..b6e35dd0ee 100644 >>>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp >>>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp >>>>> @@ -1990,6 +1990,7 @@ CodeEmitterNVC0::getSRegEncoding(const ValueRef& >>>>> ref) >>>>> case SV_INVOCATION_ID: return 0x11; >>>>> case SV_YDIR: return 0x12; >>>>> case SV_THREAD_KILL: return 0x13; >>>>> + case SV_COMBINED_TID: return 0x20; >>>>> case SV_TID: return 0x21 + SDATA(ref).sv.index; >>>>> case SV_CTAID: return 0x25 + SDATA(ref).sv.index; >>>>> case SV_NTID: return 0x29 + SDATA(ref).sv.index; >>>>> diff --git >>>>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp >>>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp >>>>> index 36ab837f6e..1f0fd466a9 100644 >>>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp >>>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp >>>>> @@ -1201,6 +1201,9 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i) >>>>> bld.mkMov(def, bld.mkImm(0)); >>>>> } >>>>> break; >>>>> + case SV_COMBINED_TID: >>>>> + bld.mkMov(def, tid); >>>>> + break; >>>>> case SV_SAMPLE_POS: { >>>>> Value *off = new_LValue(func, FILE_ADDRESS); >>>>> bld.mkOp1(OP_RDSV, TYPE_U32, def, bld.mkSysVal(SV_SAMPLE_INDEX, >>>>> 0)); >>>>> diff --git >>>>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >>>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >>>>> index 597dcdffbe..1410cf26c8 100644 >>>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >>>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >>>>> @@ -2576,6 +2576,18 @@ NVC0LoweringPass::handleRDSV(Instruction *i) >>>>> // TGSI backend may use 4th component of TID,NTID,CTAID,NCTAID >>>>> i->op = OP_MOV; >>>>> i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : >>>>> 0)); >>>>> + } else >>>>> + if (sv == SV_TID) { >>>>> + // Help CSE combine TID fetches >>>>> + Value *tid = bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), >>>>> + bld.mkSysVal(SV_COMBINED_TID, 0)); >>>>> + i->op = OP_EXTBF; >>>>> + i->setSrc(0, tid); >>>>> + switch (sym->reg.data.sv.index) { >>>>> + case 0: i->setSrc(1, bld.mkImm(0x1000)); break; >>>>> + case 1: i->setSrc(1, bld.mkImm(0x0a10)); break; >>>>> + case 2: i->setSrc(1, bld.mkImm(0x061a)); break; >>>>> + } >>>>> } >>>>> if (sv == SV_VERTEX_COUNT) { >>>>> bld.setPosition(i, true); >>>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >>>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >>>>> index 39177bd044..4dd3a6cdaa 100644 >>>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >>>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >>>>> @@ -1654,6 +1654,7 @@ ModifierFolding::visit(BasicBlock *bb) >>>>> // SLCT(a, b, const) -> cc(const) ? a : b >>>>> // RCP(RCP(a)) -> a >>>>> // MUL(MUL(a, b), const) -> MUL_Xconst(a, b) >>>>> +// EXTBF(RDSV(COMBINED_TID)) -> RDSV(TID) >>>>> class AlgebraicOpt : public Pass >>>>> { >>>>> private: >>>>> @@ -1671,6 +1672,7 @@ private: >>>>> void handleCVT_EXTBF(Instruction *); >>>>> void handleSUCLAMP(Instruction *); >>>>> void handleNEG(Instruction *); >>>>> + void handleEXTBF_RDSV(Instruction *); >>>>> >>>>> BuildUtil bld; >>>>> }; >>>>> @@ -2175,6 +2177,41 @@ AlgebraicOpt::handleNEG(Instruction *i) { >>>>> } >>>>> } >>>>> >>>>> +// EXTBF(RDSV(COMBINED_TID)) -> RDSV(TID) >>>>> +void >>>>> +AlgebraicOpt::handleEXTBF_RDSV(Instruction *i) >>>>> +{ >>>>> + Instruction *rdsv = i->getSrc(0)->getUniqueInsn(); >>>>> + if (rdsv->op != OP_RDSV || >>>>> + rdsv->getSrc(0)->asSym()->reg.data.sv.sv != SV_COMBINED_TID) >>>>> + return; >>>>> + // Avoid creating more RDSV instructions >>>>> + if (rdsv->getDef(0)->refCount() > 1) >>>>> + return; >>>>> + >>>>> + ImmediateValue imm; >>>>> + if (!i->src(1).getImmediate(imm)) >>>>> + return; >>>>> + >>>>> + int index; >>>>> + if (imm.isInteger(0x1000)) >>>>> + index = 0; >>>>> + else >>>>> + if (imm.isInteger(0x0a10)) >>>>> + index = 1; >>>>> + else >>>>> + if (imm.isInteger(0x061a)) >>>>> + index = 2; >>>>> + else >>>>> + return; >>>>> + >>>>> + bld.setPosition(i, false); >>>>> + >>>>> + i->op = OP_RDSV; >>>>> + i->setSrc(0, bld.mkSysVal(SV_TID, index)); >>>>> + i->setSrc(1, NULL); >>>>> +} >>>>> + >>>>> bool >>>>> AlgebraicOpt::visit(BasicBlock *bb) >>>>> { >>>>> @@ -2215,6 +2252,9 @@ AlgebraicOpt::visit(BasicBlock *bb) >>>>> case OP_NEG: >>>>> handleNEG(i); >>>>> break; >>>>> + case OP_EXTBF: >>>>> + handleEXTBF_RDSV(i); >>>>> + break; >>>>> default: >>>>> break; >>>>> } >>>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >>>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >>>>> index cbb21f5f72..ee3506fbae 100644 >>>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >>>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >>>>> @@ -306,6 +306,7 @@ static const char *SemanticStr[SV_LAST + 1] = >>>>> "TESS_INNER", >>>>> "TESS_COORD", >>>>> "TID", >>>>> + "COMBINED_TID", >>>>> "CTAID", >>>>> "NTID", >>>>> "GRIDID", >>>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >>>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >>>>> index dc73231394..1ad3467337 100644 >>>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >>>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >>>>> @@ -257,6 +257,7 @@ TargetNV50::getSVAddress(DataFile shaderFile, const >>>>> Symbol *sym) const >>>>> case SV_NTID: >>>>> return 0x2 + 2 * sym->reg.data.sv.index; >>>>> case SV_TID: >>>>> + case SV_COMBINED_TID: >>>>> return 0; >>>>> case SV_SAMPLE_POS: >>>>> return 0; /* sample position is handled differently */ >>>>> -- >>>>> 2.14.4 >>>>> >>>> _______________________________________________ >>>> mesa-dev mailing list >>>> [email protected] >>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev >>> _______________________________________________ >>> mesa-dev mailing list >>> [email protected] >>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
