Patches 1-2 seem OK. I'm a little concerned that this one is changing functionality, since it's removing the "need_flush" thing. It'd be nice if you could get this patch some heavier testing before pushing it out...
On Wed, Oct 26, 2016 at 4:00 PM, Samuel Pitoiset <[email protected]> wrote: > The first goal is to reduce code duplication between 3d and > compute and increase readability of that area. > > This refactoring also tries to reduce the number of commands > send through the pushbuffer and to not invalidate all caches > when binding new textures/samplers. Although I don't see any > improvements with Elemental but this might help in some cases. > > Signed-off-by: Samuel Pitoiset <[email protected]> > --- > src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 12 +- > src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 7 +- > src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 159 > ++++++++++++++---------- > src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 98 ++------------- > 4 files changed, 113 insertions(+), 163 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c > index 11635c9..041cf1c 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c > @@ -143,11 +143,7 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen, > static void > nvc0_compute_validate_samplers(struct nvc0_context *nvc0) > { > - bool need_flush = nvc0_validate_tsc(nvc0, 5); > - if (need_flush) { > - BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1); > - PUSH_DATA (nvc0->base.pushbuf, 0); > - } > + nvc0_validate_tsc(nvc0, 5); > > /* Invalidate all 3D samplers because they are aliased. */ > for (int s = 0; s < 5; s++) > @@ -158,11 +154,7 @@ nvc0_compute_validate_samplers(struct nvc0_context *nvc0) > static void > nvc0_compute_validate_textures(struct nvc0_context *nvc0) > { > - bool need_flush = nvc0_validate_tic(nvc0, 5); > - if (need_flush) { > - BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1); > - PUSH_DATA (nvc0->base.pushbuf, 0); > - } > + nvc0_validate_tic(nvc0, 5); > > /* Invalidate all 3D textures because they are aliased. */ > for (int s = 0; s < 5; s++) { > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > index 37aecae..8750edc 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > @@ -330,9 +330,10 @@ extern void nvc0_clear(struct pipe_context *, unsigned > buffers, > extern void nvc0_init_surface_functions(struct nvc0_context *); > > /* nvc0_tex.c */ > -bool nvc0_validate_tic(struct nvc0_context *nvc0, int s); > -bool nvc0_validate_tsc(struct nvc0_context *nvc0, int s); > -bool nve4_validate_tsc(struct nvc0_context *nvc0, int s); > +void nvc0_validate_tic(struct nvc0_context *nvc0, int s); > +void nvc0_validate_tsc(struct nvc0_context *nvc0, int s); > +void nve4_validate_tic(struct nvc0_context *nvc0, int s); > +void nve4_validate_tsc(struct nvc0_context *nvc0, int s); > void nvc0_validate_suf(struct nvc0_context *nvc0, int s); > void nvc0_validate_textures(struct nvc0_context *); > void nvc0_validate_samplers(struct nvc0_context *); > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c > index 23c9daa..4f6788c 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c > @@ -24,6 +24,7 @@ > #include "nvc0/nvc0_resource.h" > #include "nvc0/gm107_texture.xml.h" > #include "nvc0/nvc0_compute.xml.h" > +#include "nvc0/nve4_compute.xml.h" > #include "nv50/g80_texture.xml.h" > #include "nv50/g80_defs.xml.h" > > @@ -468,14 +469,13 @@ nvc0_update_tic(struct nvc0_context *nvc0, struct > nv50_tic_entry *tic, > tic->tic[2] |= address >> 32; > } > > -bool > +void > nvc0_validate_tic(struct nvc0_context *nvc0, int s) > { > - uint32_t commands[32]; > struct nouveau_pushbuf *push = nvc0->base.pushbuf; > + uint32_t commands[3][16]; > + unsigned n[3] = { 0, 0, 0 }; > unsigned i; > - unsigned n = 0; > - bool need_flush = false; > > for (i = 0; i < nvc0->num_textures[s]; ++i) { > struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); > @@ -484,7 +484,7 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) > > if (!tic) { > if (dirty) > - commands[n++] = (i << 1) | 0; > + commands[0][n[0]++] = (i << 1) | 0; > continue; > } > res = nv04_resource(tic->pipe.texture); > @@ -496,15 +496,11 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) > nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32, > NV_VRAM_DOMAIN(&nvc0->screen->base), 32, > tic->tic); > - need_flush = true; > + > + commands[1][n[1]++] = (tic->id << 4) | 1; > } else > if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { > - if (unlikely(s == 5)) > - BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1); > - else > - BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); > - PUSH_DATA (push, (tic->id << 4) | 1); > - NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1); > + commands[2][n[2]++] = (tic->id << 4) | 1; > } > nvc0_screen_tic_lock(nvc0->screen, tic); > > @@ -513,7 +509,7 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) > > if (!dirty) > continue; > - commands[n++] = (tic->id << 9) | (i << 1) | 1; > + commands[0][n[0]++] = (tic->id << 9) | (i << 1) | 1; > > if (unlikely(s == 5)) > BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD); > @@ -521,28 +517,41 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) > BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD); > } > for (; i < nvc0->state.num_textures[s]; ++i) > - commands[n++] = (i << 1) | 0; > + commands[0][n[0]++] = (i << 1) | 0; > > nvc0->state.num_textures[s] = nvc0->num_textures[s]; > > - if (n) { > + if (n[0]) { > if (unlikely(s == 5)) > - BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n); > + BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n[0]); > else > - BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n); > - PUSH_DATAp(push, commands, n); > + BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n[0]); > + PUSH_DATAp(push, commands[0], n[0]); > + } > + if (n[1]) { > + if (unlikely(s == 5)) > + BEGIN_NIC0(push, NVC0_CP(TIC_FLUSH), n[1]); > + else > + BEGIN_NIC0(push, NVC0_3D(TIC_FLUSH), n[1]); > + PUSH_DATAp(push, commands[1], n[1]); > + } > + if (n[2]) { > + if (unlikely(s == 5)) > + BEGIN_NIC0(push, NVC0_CP(TEX_CACHE_CTL), n[2]); > + else > + BEGIN_NIC0(push, NVC0_3D(TEX_CACHE_CTL), n[2]); > + PUSH_DATAp(push, commands[2], n[2]); > } > nvc0->textures_dirty[s] = 0; > - > - return need_flush; > } > > -static bool > -nve4_validate_tic(struct nvc0_context *nvc0, unsigned s) > +void > +nve4_validate_tic(struct nvc0_context *nvc0, int s) > { > struct nouveau_pushbuf *push = nvc0->base.pushbuf; > + uint32_t commands[2][PIPE_MAX_SAMPLERS]; > + unsigned n[2] = { 0, 0 }; > unsigned i; > - bool need_flush = false; > > for (i = 0; i < nvc0->num_textures[s]; ++i) { > struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); > @@ -562,11 +571,11 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s) > nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32, > NV_VRAM_DOMAIN(&nvc0->screen->base), 32, > tic->tic); > - need_flush = true; > + > + commands[0][n[0]++] = (tic->id << 4) | 1; > } else > if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { > - BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); > - PUSH_DATA (push, (tic->id << 4) | 1); > + commands[1][n[1]++] = (tic->id << 4) | 1; > } > nvc0_screen_tic_lock(nvc0->screen, tic); > > @@ -575,34 +584,46 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s) > > nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID; > nvc0->tex_handles[s][i] |= tic->id; > - if (dirty) > - BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD); > + > + if (dirty) { > + if (unlikely(s == 5)) > + BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD); > + else > + BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD); > + } > } > for (; i < nvc0->state.num_textures[s]; ++i) { > nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; > nvc0->textures_dirty[s] |= 1 << i; > } > > - nvc0->state.num_textures[s] = nvc0->num_textures[s]; > + if (n[0]) { > + if (unlikely(s == 5)) > + BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), n[0]); > + else > + BEGIN_NIC0(push, NVC0_3D(TIC_FLUSH), n[0]); > + PUSH_DATAp(push, commands[0], n[0]); > + } > + if (n[1]) { > + if (unlikely(s == 5)) > + BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), n[1]); > + else > + BEGIN_NIC0(push, NVC0_3D(TEX_CACHE_CTL), n[1]); > + PUSH_DATAp(push, commands[1], n[1]); > + } > > - return need_flush; > + nvc0->state.num_textures[s] = nvc0->num_textures[s]; > } > > void nvc0_validate_textures(struct nvc0_context *nvc0) > { > - bool need_flush = false; > int i; > > for (i = 0; i < 5; i++) { > if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) > - need_flush |= nve4_validate_tic(nvc0, i); > + nve4_validate_tic(nvc0, i); > else > - need_flush |= nvc0_validate_tic(nvc0, i); > - } > - > - if (need_flush) { > - BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1); > - PUSH_DATA (nvc0->base.pushbuf, 0); > + nvc0_validate_tic(nvc0, i); > } > > /* Invalidate all CP textures because they are aliased. */ > @@ -612,14 +633,13 @@ void nvc0_validate_textures(struct nvc0_context *nvc0) > nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES; > } > > -bool > +void > nvc0_validate_tsc(struct nvc0_context *nvc0, int s) > { > - uint32_t commands[16]; > struct nouveau_pushbuf *push = nvc0->base.pushbuf; > + uint32_t commands[2][16]; > + unsigned n[2] = { 0, 0 }; > unsigned i; > - unsigned n = 0; > - bool need_flush = false; > > for (i = 0; i < nvc0->num_samplers[s]; ++i) { > struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]); > @@ -627,7 +647,7 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s) > if (!(nvc0->samplers_dirty[s] & (1 << i))) > continue; > if (!tsc) { > - commands[n++] = (i << 4) | 0; > + commands[0][n[0]++] = (i << 4) | 0; > continue; > } > nvc0->seamless_cube_map = tsc->seamless_cube_map; > @@ -637,34 +657,42 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s) > nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc, > 65536 + tsc->id * 32, > NV_VRAM_DOMAIN(&nvc0->screen->base), > 32, tsc->tsc); > - need_flush = true; > + > + commands[1][n[1]++] = (tsc->id << 4) | 1; > } > nvc0_screen_tsc_lock(nvc0->screen, tsc); > > - commands[n++] = (tsc->id << 12) | (i << 4) | 1; > + commands[0][n[0]++] = (tsc->id << 12) | (i << 4) | 1; > } > for (; i < nvc0->state.num_samplers[s]; ++i) > - commands[n++] = (i << 4) | 0; > + commands[0][n[0]++] = (i << 4) | 0; > > nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; > > - if (n) { > + if (n[0]) { > if (unlikely(s == 5)) > - BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n); > + BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n[0]); > else > - BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n); > - PUSH_DATAp(push, commands, n); > + BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n[0]); > + PUSH_DATAp(push, commands[0], n[0]); > + } > + if (n[1]) { > + if (unlikely(s == 5)) > + BEGIN_NIC0(push, NVC0_CP(TSC_FLUSH), n[1]); > + else > + BEGIN_NIC0(push, NVC0_3D(TSC_FLUSH), n[1]); > + PUSH_DATAp(push, commands[1], n[1]); > } > nvc0->samplers_dirty[s] = 0; > - > - return need_flush; > } > > -bool > +void > nve4_validate_tsc(struct nvc0_context *nvc0, int s) > { > + struct nouveau_pushbuf *push = nvc0->base.pushbuf; > + uint32_t commands[PIPE_MAX_SAMPLERS]; > + unsigned n = 0; > unsigned i; > - bool need_flush = false; > > for (i = 0; i < nvc0->num_samplers[s]; ++i) { > struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]); > @@ -680,7 +708,8 @@ nve4_validate_tsc(struct nvc0_context *nvc0, int s) > 65536 + tsc->id * 32, > NV_VRAM_DOMAIN(&nvc0->screen->base), > 32, tsc->tsc); > - need_flush = true; > + > + commands[n++] = (tsc->id << 4) | 1; > } > nvc0_screen_tsc_lock(nvc0->screen, tsc); > > @@ -692,26 +721,26 @@ nve4_validate_tsc(struct nvc0_context *nvc0, int s) > nvc0->samplers_dirty[s] |= 1 << i; > } > > - nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; > + if (n) { > + if (unlikely(s == 5)) > + BEGIN_NIC0(push, NVE4_CP(TSC_FLUSH), n); > + else > + BEGIN_NIC0(push, NVC0_3D(TSC_FLUSH), n); > + PUSH_DATAp(push, commands, n); > + } > > - return need_flush; > + nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; > } > > void nvc0_validate_samplers(struct nvc0_context *nvc0) > { > - bool need_flush = false; > int i; > > for (i = 0; i < 5; i++) { > if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) > - need_flush |= nve4_validate_tsc(nvc0, i); > + nve4_validate_tsc(nvc0, i); > else > - need_flush |= nvc0_validate_tsc(nvc0, i); > - } > - > - if (need_flush) { > - BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1); > - PUSH_DATA (nvc0->base.pushbuf, 0); > + nvc0_validate_tsc(nvc0, i); > } > > /* Invalidate all CP samplers because they are aliased. */ > diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c > b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c > index 26732a1..9947b37 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c > +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c > @@ -276,15 +276,10 @@ nve4_compute_validate_surfaces(struct nvc0_context > *nvc0) > } > } > > -/* Thankfully, textures with samplers follow the normal rules. */ > static void > nve4_compute_validate_samplers(struct nvc0_context *nvc0) > { > - bool need_flush = nve4_validate_tsc(nvc0, 5); > - if (need_flush) { > - BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1); > - PUSH_DATA (nvc0->base.pushbuf, 0); > - } > + nve4_validate_tsc(nvc0, 5); > > /* Invalidate all 3D samplers because they are aliased. */ > for (int s = 0; s < 5; s++) > @@ -292,13 +287,18 @@ nve4_compute_validate_samplers(struct nvc0_context > *nvc0) > nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS; > } > > -/* (Code duplicated at bottom for various non-convincing reasons. > - * E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC > - * entries to avoid a subchannel switch. > - * Same for texture cache flushes. > - * Also, the bufctx differs, and more IFs in the 3D version looks ugly.) > - */ > -static void nve4_compute_validate_textures(struct nvc0_context *); > +static void > +nve4_compute_validate_textures(struct nvc0_context *nvc0) > +{ > + nve4_validate_tic(nvc0, 5); > + > + /* Invalidate all 3D textures because they are aliased. */ > + for (int s = 0; s < 5; s++) { > + for (int i = 0; i < nvc0->num_textures[s]; i++) > + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i)); > + nvc0->textures_dirty[s] = ~0; > + } > +} > > static void > nve4_compute_set_tex_handles(struct nvc0_context *nvc0) > @@ -660,78 +660,6 @@ out: > nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC); > } > > - > -#define NVE4_TIC_ENTRY_INVALID 0x000fffff > - > -static void > -nve4_compute_validate_textures(struct nvc0_context *nvc0) > -{ > - struct nouveau_pushbuf *push = nvc0->base.pushbuf; > - const unsigned s = 5; > - unsigned i; > - uint32_t commands[2][32]; > - unsigned n[2] = { 0, 0 }; > - > - for (i = 0; i < nvc0->num_textures[s]; ++i) { > - struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); > - struct nv04_resource *res; > - const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i)); > - > - if (!tic) { > - nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; > - continue; > - } > - res = nv04_resource(tic->pipe.texture); > - nvc0_update_tic(nvc0, tic, res); > - > - if (tic->id < 0) { > - tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); > - > - nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32, > - NV_VRAM_DOMAIN(&nvc0->screen->base), > - 32, tic->tic); > - > - commands[0][n[0]++] = (tic->id << 4) | 1; > - } else > - if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { > - commands[1][n[1]++] = (tic->id << 4) | 1; > - } > - nvc0_screen_tic_lock(nvc0->screen, tic); > - > - res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; > - res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; > - > - nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID; > - nvc0->tex_handles[s][i] |= tic->id; > - if (dirty) > - BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD); > - } > - for (; i < nvc0->state.num_textures[s]; ++i) { > - nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; > - nvc0->textures_dirty[s] |= 1 << i; > - } > - > - if (n[0]) { > - BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), n[0]); > - PUSH_DATAp(push, commands[0], n[0]); > - } > - if (n[1]) { > - BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), n[1]); > - PUSH_DATAp(push, commands[1], n[1]); > - } > - > - nvc0->state.num_textures[s] = nvc0->num_textures[s]; > - > - /* Invalidate all 3D textures because they are aliased. */ > - for (int s = 0; s < 5; s++) { > - for (int i = 0; i < nvc0->num_textures[s]; i++) > - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i)); > - nvc0->textures_dirty[s] = ~0; > - } > - nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES; > -} > - > - > #ifdef DEBUG > static const char *nve4_cache_split_name(unsigned value) > { > -- > 2.10.1 > > _______________________________________________ > mesa-dev mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
