On Fri, May 27, 2016 at 2:18 PM, Marek Olšák <[email protected]> wrote: > From: Marek Olšák <[email protected]> > > R600-R700 used a bad workaround. Now only R600 has to use it. > --- > src/gallium/drivers/r600/evergreen_hw_context.c | 13 +++++++++++-- > src/gallium/drivers/r600/evergreend.h | 1 + > src/gallium/drivers/r600/r600_blit.c | 6 ------ > src/gallium/drivers/r600/r600_hw_context.c | 25 > ++++++++++++++++++++----- > src/gallium/drivers/r600/r600d.h | 1 + > 5 files changed, 33 insertions(+), 13 deletions(-) > > diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c > b/src/gallium/drivers/r600/evergreen_hw_context.c > index f456696..14877ae 100644 > --- a/src/gallium/drivers/r600/evergreen_hw_context.c > +++ b/src/gallium/drivers/r600/evergreen_hw_context.c > @@ -117,7 +117,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context > *rctx, > unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); > unsigned reloc; > > - r600_need_cs_space(rctx, 10 + (rctx->b.flags ? > R600_MAX_FLUSH_CS_DWORDS : 0), FALSE); > + r600_need_cs_space(rctx, 2 + 10 + (rctx->b.flags ? > R600_MAX_FLUSH_CS_DWORDS : 0), FALSE); > > /* Flush the caches for the first copy only. */ > if (rctx->b.flags) { > @@ -148,9 +148,18 @@ void evergreen_cp_dma_clear_buffer(struct r600_context > *rctx, > offset += byte_count; > } > > + /* CP DMA is executed in ME, but index buffers are read by PFP. > + * This ensures that ME (CP DMA) is idle before PFP starts fetching > + * indices. If we wanted to execute CP DMA in PFP, this packet > + * should precede it. > + */ > + if (coher == R600_COHERENCY_SHADER) { > + radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); > + radeon_emit(cs, 0); > + }
Did you test this on 7xx? I don't think this works on 7xx despite what the documentation says. See this kernel commit: http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=b6c2b4faf90230ef9cf1a81f36cbccda4a606c59 I think this packet only works on evergreen and newer. Alex > + > /* Invalidate the read caches. */ > rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE | > R600_CONTEXT_INV_VERTEX_CACHE | > R600_CONTEXT_INV_TEX_CACHE; > } > - > diff --git a/src/gallium/drivers/r600/evergreend.h > b/src/gallium/drivers/r600/evergreend.h > index c1c6169..457152e 100644 > --- a/src/gallium/drivers/r600/evergreend.h > +++ b/src/gallium/drivers/r600/evergreend.h > @@ -88,6 +88,7 @@ > #define WAIT_REG_MEM_EQUAL 3 > #define PKT3_MEM_WRITE 0x3D > #define PKT3_INDIRECT_BUFFER 0x32 > +#define PKT3_PFP_SYNC_ME 0x42 /* r7xx+ */ > #define PKT3_SURFACE_SYNC 0x43 > #define PKT3_ME_INITIALIZE 0x44 > #define PKT3_COND_WRITE 0x45 > diff --git a/src/gallium/drivers/r600/r600_blit.c > b/src/gallium/drivers/r600/r600_blit.c > index 9230b40..9f309d8 100644 > --- a/src/gallium/drivers/r600/r600_blit.c > +++ b/src/gallium/drivers/r600/r600_blit.c > @@ -519,12 +519,6 @@ static void r600_copy_buffer(struct pipe_context *ctx, > struct pipe_resource *dst > } else { > util_resource_copy_region(ctx, dst, 0, dstx, 0, 0, src, 0, > src_box); > } > - > - /* The index buffer (VGT) doesn't seem to see the result of the > copying. > - * Can we somehow flush the index buffer cache? Starting a new IB > seems > - * to do the trick. */ > - if (rctx->b.chip_class <= R700) > - rctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); > } > > /** > diff --git a/src/gallium/drivers/r600/r600_hw_context.c > b/src/gallium/drivers/r600/r600_hw_context.c > index 1f7bed8..5d6200d 100644 > --- a/src/gallium/drivers/r600/r600_hw_context.c > +++ b/src/gallium/drivers/r600/r600_hw_context.c > @@ -403,7 +403,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, > unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); > unsigned src_reloc, dst_reloc; > > - r600_need_cs_space(rctx, 10 + (rctx->b.flags ? > R600_MAX_FLUSH_CS_DWORDS : 0), FALSE); > + r600_need_cs_space(rctx, 2 + 10 + (rctx->b.flags ? > R600_MAX_FLUSH_CS_DWORDS : 0), FALSE); > > /* Flush the caches for the first copy only. */ > if (rctx->b.flags) { > @@ -438,10 +438,25 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, > dst_offset += byte_count; > } > > - /* Invalidate the read caches. */ > - rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE | > - R600_CONTEXT_INV_VERTEX_CACHE | > - R600_CONTEXT_INV_TEX_CACHE; > + /* CP DMA is executed in ME, but index buffers are read by PFP. > + * This ensures that ME (CP DMA) is idle before PFP starts fetching > + * indices. If we wanted to execute CP DMA in PFP, this packet > + * should precede it. > + * > + * R6xx is out of luck, as it doesn't have the packet. > + * Starting a new IB has the same effect. > + */ > + if (rctx->b.chip_class >= R700) { > + radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); > + radeon_emit(cs, 0); > + > + /* Invalidate the read caches. */ > + rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE | > + R600_CONTEXT_INV_VERTEX_CACHE | > + R600_CONTEXT_INV_TEX_CACHE; > + } else { > + rctx->b.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL); > + } > } > > void r600_dma_copy_buffer(struct r600_context *rctx, > diff --git a/src/gallium/drivers/r600/r600d.h > b/src/gallium/drivers/r600/r600d.h > index 24f599e..0b6dabd 100644 > --- a/src/gallium/drivers/r600/r600d.h > +++ b/src/gallium/drivers/r600/r600d.h > @@ -98,6 +98,7 @@ > #define WAIT_REG_MEM_EQUAL 3 > #define PKT3_MEM_WRITE 0x3D > #define PKT3_INDIRECT_BUFFER 0x32 > +#define PKT3_PFP_SYNC_ME 0x42 /* r7xx+ */ > #define PKT3_SURFACE_SYNC 0x43 > #define PKT3_ME_INITIALIZE 0x44 > #define PKT3_COND_WRITE 0x45 > -- > 2.7.4 > > _______________________________________________ > mesa-dev mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
