r100_copy_blit() copies BOs as 1024-pixel-wide ARGB8888 blits, so one
GPU page becomes one blit row. Large copies are split into chunks of at
most 8191 rows.

The kernel register header names the packet coordinate dwords SRC_Y_X
and DST_Y_X. In the BITBLT_MULTI description in
R5xx_Acceleration_v1.5.pdf docs, these correspond to [SRC_X1 | SRC_Y1]
and [DST_X1 | DST_Y1], which are signed 13-bit coordinates in the
-8192..8191 range. The old code kept SRC/DST_PITCH_OFFSET at the BO base
and used SRC_Y_X/DST_Y_X as the chunk address, so large BO moves could
exceed that coordinate range.

Compute per-chunk SRC/DST_PITCH_OFFSET bases and emit zero source and
destination coordinates. r100_copy_blit() already packs
SRC/DST_PITCH_OFFSET as pitch plus base offset, so large chunk addresses
belong there rather than in the coordinate fields.

This fixes Prison Architect corruption with 4096x4096 mipped textures
after they are evicted to GTT under memory pressure on RV530.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/work_items/6716
Cc: [email protected]
Signed-off-by: Pavel Ondračka <[email protected]>
---
 drivers/gpu/drm/radeon/r100.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 3ac1a79b6f13..533215d6e9cb 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -906,6 +906,7 @@ struct radeon_fence *r100_copy_blit(struct radeon_device 
*rdev,
 {
        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
        struct radeon_fence *fence;
+       uint64_t cur_src_offset, cur_dst_offset;
        uint32_t cur_pages;
        uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
        uint32_t pitch;
@@ -934,6 +935,10 @@ struct radeon_fence *r100_copy_blit(struct radeon_device 
*rdev,
                        cur_pages = 8191;
                }
                num_gpu_pages -= cur_pages;
+               cur_src_offset = src_offset +
+                       (uint64_t)num_gpu_pages * RADEON_GPU_PAGE_SIZE;
+               cur_dst_offset = dst_offset +
+                       (uint64_t)num_gpu_pages * RADEON_GPU_PAGE_SIZE;
 
                /* pages are in Y direction - height
                   page width in X direction - width */
@@ -950,13 +955,13 @@ struct radeon_fence *r100_copy_blit(struct radeon_device 
*rdev,
                                  RADEON_DP_SRC_SOURCE_MEMORY |
                                  RADEON_GMC_CLR_CMP_CNTL_DIS |
                                  RADEON_GMC_WR_MSK_DIS);
-               radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10));
-               radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10));
+               radeon_ring_write(ring, (pitch << 22) | (cur_src_offset >> 10));
+               radeon_ring_write(ring, (pitch << 22) | (cur_dst_offset >> 10));
                radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
                radeon_ring_write(ring, 0);
                radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
-               radeon_ring_write(ring, num_gpu_pages);
-               radeon_ring_write(ring, num_gpu_pages);
+               radeon_ring_write(ring, 0);
+               radeon_ring_write(ring, 0);
                radeon_ring_write(ring, cur_pages | (stride_pixels << 16));
        }
        radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
-- 
2.52.0

Reply via email to