From: Nicolai Hähnle <nicolai.haeh...@amd.com> VM faults cannot be disabled for SDMA on <= VI.
We could still use SDMA by asking the winsys about which parts of the buffers are committed. This is left as a potential future improvement. --- src/gallium/drivers/radeonsi/cik_sdma.c | 7 +++++-- src/gallium/drivers/radeonsi/si_cp_dma.c | 1 + src/gallium/drivers/radeonsi/si_dma.c | 7 +++++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index bee35cd..90f4f21 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -73,21 +73,22 @@ static void cik_sdma_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned clear_value) { struct si_context *sctx = (struct si_context *)ctx; struct radeon_winsys_cs *cs = sctx->b.dma.cs; unsigned i, ncopy, csize; struct r600_resource *rdst = r600_resource(dst); - if (!cs || offset % 4 != 0 || size % 4 != 0) { + if (!cs || offset % 4 != 0 || size % 4 != 0 || + dst->flags & PIPE_RESOURCE_FLAG_SPARSE) { ctx->clear_buffer(ctx, dst, offset, size, &clear_value, 4); return; } /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ util_range_add(&rdst->valid_buffer_range, offset, offset + size); offset += rdst->gpu_address; @@ -519,21 +520,23 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, static void cik_sdma_copy(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, unsigned src_level, const struct pipe_box *src_box) { struct si_context *sctx = (struct si_context *)ctx; - if (!sctx->b.dma.cs) + if (!sctx->b.dma.cs || + src->flags & PIPE_RESOURCE_FLAG_SPARSE || + dst->flags & PIPE_RESOURCE_FLAG_SPARSE) goto fallback; if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { cik_sdma_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width); return; } if (cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box)) return; diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 812fcbc..f75ce05 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -195,20 +195,21 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ util_range_add(&rdst->valid_buffer_range, offset, offset + dma_clear_size); /* dma_clear_buffer can use clear_buffer on failure. Make sure that * doesn't happen. We don't want an infinite recursion: */ if (sctx->b.dma.cs && + !(dst->flags & PIPE_RESOURCE_FLAG_SPARSE) && (offset % 4 == 0) && /* CP DMA is very slow. Always use SDMA for big clears. This * alone improves DeusEx:MD performance by 70%. */ (size > 128 * 1024 || /* Buffers not used by the GFX IB yet will be cleared by SDMA. * This happens to move most buffer clears to SDMA, including * DCC and CMASK clears, because pipe->clear clears them before * si_emit_framebuffer_state (in a draw call) adds them. * For example, DeusEx:MD has 21 buffer clears per frame and all * of them are moved to SDMA thanks to this. */ diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index 9dbee3a..b236161 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -82,21 +82,22 @@ static void si_dma_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned clear_value) { struct si_context *sctx = (struct si_context *)ctx; struct radeon_winsys_cs *cs = sctx->b.dma.cs; unsigned i, ncopy, csize; struct r600_resource *rdst = r600_resource(dst); - if (!cs || offset % 4 != 0 || size % 4 != 0) { + if (!cs || offset % 4 != 0 || size % 4 != 0 || + dst->flags & PIPE_RESOURCE_FLAG_SPARSE) { ctx->clear_buffer(ctx, dst, offset, size, &clear_value, 4); return; } /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ util_range_add(&rdst->valid_buffer_range, offset, offset + size); offset += rdst->gpu_address; @@ -226,21 +227,23 @@ static void si_dma_copy(struct pipe_context *ctx, const struct pipe_box *src_box) { struct si_context *sctx = (struct si_context *)ctx; struct r600_texture *rsrc = (struct r600_texture*)src; struct r600_texture *rdst = (struct r600_texture*)dst; unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode; unsigned src_w, dst_w; unsigned src_x, src_y; unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; - if (sctx->b.dma.cs == NULL) { + if (sctx->b.dma.cs == NULL || + src->flags & PIPE_RESOURCE_FLAG_SPARSE || + dst->flags & PIPE_RESOURCE_FLAG_SPARSE) { goto fallback; } if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { si_dma_copy_buffer(sctx, dst, src, dst_x, src_box->x, src_box->width); return; } /* XXX: Using the asynchronous DMA engine for multi-dimensional * operations seems to cause random GPU lockups for various people. -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev