From: Tvrtko Ursulin <[email protected]>

     text          data     bss      dec            hex filename
 10437711        542597  188232 11168540         aa6b1c amdgpu.ko.before
 10418181        542597  188232 11149010         aa1ed2 amdgpu.ko.after

Main reason seems to be amdgpu_ring_write() can avoid re-loading
ring->wptr when called multiple times in sequence.

Signed-off-by: Tvrtko Ursulin <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 158238f8c06a..b57951d8c997 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -385,8 +385,10 @@ static inline void amdgpu_ring_clear_ring(struct 
amdgpu_ring *ring)
 
 static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
 {
-       ring->ring[ring->wptr++ & ring->buf_mask] = v;
-       ring->wptr &= ring->ptr_mask;
+       u64 wptr = ring->wptr;
+
+       ring->ring[wptr++ & ring->buf_mask] = v;
+       ring->wptr = wptr & ring->ptr_mask;
        ring->count_dw--;
 }
 
@@ -394,9 +396,11 @@ static inline void amdgpu_ring_write_multiple(struct 
amdgpu_ring *ring,
                                              void *src, int count_dw)
 {
        unsigned occupied, chunk1, chunk2;
+       u32 buf_mask = ring->buf_mask;
+       u64 wptr = ring->wptr;
 
-       occupied = ring->wptr & ring->buf_mask;
-       chunk1 = ring->buf_mask + 1 - occupied;
+       occupied = wptr & buf_mask;
+       chunk1 = buf_mask + 1 - occupied;
        chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;
        chunk2 = count_dw - chunk1;
        chunk1 <<= 2;
@@ -410,8 +414,8 @@ static inline void amdgpu_ring_write_multiple(struct 
amdgpu_ring *ring,
                memcpy(ring->ring, src, chunk2);
        }
 
-       ring->wptr += count_dw;
-       ring->wptr &= ring->ptr_mask;
+       wptr += count_dw;
+       ring->wptr = wptr & ring->ptr_mask;
        ring->count_dw -= count_dw;
 }
 
-- 
2.47.1

Reply via email to