From: Nicolai Hähnle <nicolai.haeh...@amd.com>

VM faults cannot be disabled for SDMA on <= VI.

We could still use SDMA by asking the winsys about which parts of the
buffers are committed. This is left as a potential future improvement.
---
 src/gallium/drivers/radeonsi/cik_sdma.c  | 7 +++++--
 src/gallium/drivers/radeonsi/si_cp_dma.c | 1 +
 src/gallium/drivers/radeonsi/si_dma.c    | 7 +++++--
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c 
b/src/gallium/drivers/radeonsi/cik_sdma.c
index bee35cd..90f4f21 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -73,21 +73,22 @@ static void cik_sdma_clear_buffer(struct pipe_context *ctx,
                                  struct pipe_resource *dst,
                                  uint64_t offset,
                                  uint64_t size,
                                  unsigned clear_value)
 {
        struct si_context *sctx = (struct si_context *)ctx;
        struct radeon_winsys_cs *cs = sctx->b.dma.cs;
        unsigned i, ncopy, csize;
        struct r600_resource *rdst = r600_resource(dst);
 
-       if (!cs || offset % 4 != 0 || size % 4 != 0) {
+       if (!cs || offset % 4 != 0 || size % 4 != 0 ||
+           dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
                ctx->clear_buffer(ctx, dst, offset, size, &clear_value, 4);
                return;
        }
 
        /* Mark the buffer range of destination as valid (initialized),
         * so that transfer_map knows it should wait for the GPU when mapping
         * that range. */
        util_range_add(&rdst->valid_buffer_range, offset, offset + size);
 
        offset += rdst->gpu_address;
@@ -519,21 +520,23 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
 static void cik_sdma_copy(struct pipe_context *ctx,
                          struct pipe_resource *dst,
                          unsigned dst_level,
                          unsigned dstx, unsigned dsty, unsigned dstz,
                          struct pipe_resource *src,
                          unsigned src_level,
                          const struct pipe_box *src_box)
 {
        struct si_context *sctx = (struct si_context *)ctx;
 
-       if (!sctx->b.dma.cs)
+       if (!sctx->b.dma.cs ||
+           src->flags & PIPE_RESOURCE_FLAG_SPARSE ||
+           dst->flags & PIPE_RESOURCE_FLAG_SPARSE)
                goto fallback;
 
        if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
                cik_sdma_copy_buffer(sctx, dst, src, dstx, src_box->x, 
src_box->width);
                return;
        }
 
        if (cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz,
                                  src, src_level, src_box))
                return;
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 812fcbc..f75ce05 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -195,20 +195,21 @@ static void si_clear_buffer(struct pipe_context *ctx, 
struct pipe_resource *dst,
 
        /* Mark the buffer range of destination as valid (initialized),
         * so that transfer_map knows it should wait for the GPU when mapping
         * that range. */
        util_range_add(&rdst->valid_buffer_range, offset,
                       offset + dma_clear_size);
 
        /* dma_clear_buffer can use clear_buffer on failure. Make sure that
         * doesn't happen. We don't want an infinite recursion: */
        if (sctx->b.dma.cs &&
+           !(dst->flags & PIPE_RESOURCE_FLAG_SPARSE) &&
            (offset % 4 == 0) &&
            /* CP DMA is very slow. Always use SDMA for big clears. This
             * alone improves DeusEx:MD performance by 70%. */
            (size > 128 * 1024 ||
             /* Buffers not used by the GFX IB yet will be cleared by SDMA.
              * This happens to move most buffer clears to SDMA, including
              * DCC and CMASK clears, because pipe->clear clears them before
              * si_emit_framebuffer_state (in a draw call) adds them.
              * For example, DeusEx:MD has 21 buffer clears per frame and all
              * of them are moved to SDMA thanks to this. */
diff --git a/src/gallium/drivers/radeonsi/si_dma.c 
b/src/gallium/drivers/radeonsi/si_dma.c
index 9dbee3a..b236161 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -82,21 +82,22 @@ static void si_dma_clear_buffer(struct pipe_context *ctx,
                                struct pipe_resource *dst,
                                uint64_t offset,
                                uint64_t size,
                                unsigned clear_value)
 {
        struct si_context *sctx = (struct si_context *)ctx;
        struct radeon_winsys_cs *cs = sctx->b.dma.cs;
        unsigned i, ncopy, csize;
        struct r600_resource *rdst = r600_resource(dst);
 
-       if (!cs || offset % 4 != 0 || size % 4 != 0) {
+       if (!cs || offset % 4 != 0 || size % 4 != 0 ||
+           dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
                ctx->clear_buffer(ctx, dst, offset, size, &clear_value, 4);
                return;
        }
 
        /* Mark the buffer range of destination as valid (initialized),
         * so that transfer_map knows it should wait for the GPU when mapping
         * that range. */
        util_range_add(&rdst->valid_buffer_range, offset, offset + size);
 
        offset += rdst->gpu_address;
@@ -226,21 +227,23 @@ static void si_dma_copy(struct pipe_context *ctx,
                        const struct pipe_box *src_box)
 {
        struct si_context *sctx = (struct si_context *)ctx;
        struct r600_texture *rsrc = (struct r600_texture*)src;
        struct r600_texture *rdst = (struct r600_texture*)dst;
        unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode;
        unsigned src_w, dst_w;
        unsigned src_x, src_y;
        unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
 
-       if (sctx->b.dma.cs == NULL) {
+       if (sctx->b.dma.cs == NULL ||
+           src->flags & PIPE_RESOURCE_FLAG_SPARSE ||
+           dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
                goto fallback;
        }
 
        if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
                si_dma_copy_buffer(sctx, dst, src, dst_x, src_box->x, 
src_box->width);
                return;
        }
 
        /* XXX: Using the asynchronous DMA engine for multi-dimensional
         * operations seems to cause random GPU lockups for various people.
-- 
2.9.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to