The busy waiting method somehow make the performance worse considering 16 VF 
and vf0 need to wait 15*time_slice when it try to access registers

BR

-----Original Message-----
From: Ding, Pixel 
Sent: 2017年10月13日 17:39
To: Liu, Monk <[email protected]>; [email protected]
Cc: Li, Bingley <[email protected]>
Subject: Re: [PATCH 4/4] drm/amdgpu: busywait KIQ register accessing

I’m afraid there’s racing issue if polling and IRQ use cases are mixed at the 
same time.

The original implementation is as your suggested. Is there any benefit to keep 
to sleepy version?
— 
Sincerely Yours,
Pixel








On 13/10/2017, 5:34 PM, "Liu, Monk" <[email protected]> wrote:

>Pixel
>
>I don't think this will work well, my suggestion is you add a new function 
>like:
>amdgpu_wreg_kiq_busy(),
>
>which will write registers through KIQ and use polling/busy wait, and the 
>original amdgu_wreg_no_kiq() can be still there.
>
>When you need to disable sleep like in IRQ CONTEXT, you can call 
>wreg_kiq_busy() or wreg_no_kiq(),
>
>But don't just change the original function 
>
>BR Monk
>
>-----Original Message-----
>From: amd-gfx [mailto:[email protected]] On Behalf Of 
>Pixel Ding
>Sent: 2017年10月13日 16:26
>To: [email protected]
>Cc: Ding, Pixel <[email protected]>; Li, Bingley <[email protected]>
>Subject: [PATCH 4/4] drm/amdgpu: busywait KIQ register accessing
>
>From: pding <[email protected]>
>
>Register accessing is performed when IRQ is disabled. Never sleep in this 
>function.
>
>Known issue: dead sleep in many use cases of index/data registers.
>
>Signed-off-by: pding <[email protected]>
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  8 ++---  
> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  |  2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    |  1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 55 ++++++++++++++++++------------
> 6 files changed, 40 insertions(+), 28 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>index ca212ef..f9de756 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>@@ -886,6 +886,7 @@ struct amdgpu_kiq {
>       u64                     eop_gpu_addr;
>       struct amdgpu_bo        *eop_obj;
>       struct mutex            ring_mutex;
>+      spinlock_t              ring_lock;
>       struct amdgpu_ring      ring;
>       struct amdgpu_irq_src   irq;
> };
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>index ab8f0d6..1197274 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>@@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, 
>uint32_t reg,  {
>       uint32_t ret;
> 
>-      if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
>-              BUG_ON(in_interrupt());
>+      if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
>               return amdgpu_virt_kiq_rreg(adev, reg);
>-      }
> 
>       if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
>               ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); @@ 
> -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t 
> reg, uint32_t v,
>               adev->last_mm_index = v;
>       }
> 
>-      if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
>-              BUG_ON(in_interrupt());
>+      if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
>               return amdgpu_virt_kiq_wreg(adev, reg, v);
>-      }
> 
>       if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
>               writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); diff 
> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>index 2044758..c6c7bf3 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>@@ -109,7 +109,7 @@ static void amdgpu_fence_write(struct amdgpu_ring *ring, 
>u32 seq)
>  * Reads a fence value from memory (all asics).
>  * Returns the value of the fence read from memory.
>  */
>-static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
>+u32 amdgpu_fence_read(struct amdgpu_ring *ring)
> {
>       struct amdgpu_fence_driver *drv = &ring->fence_drv;
>       u32 seq = 0;
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>index 4f6c68f..46fa88c 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>@@ -186,6 +186,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
>       int r = 0;
> 
>       mutex_init(&kiq->ring_mutex);
>+      spin_lock_init(&kiq->ring_lock);
> 
>       r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
>       if (r)
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>index af8e544..a4fa923 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>@@ -89,6 +89,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, 
> void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);  void 
>amdgpu_fence_driver_resume(struct amdgpu_device *adev);  int 
>amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
>+u32 amdgpu_fence_read(struct amdgpu_ring *ring);
> void amdgpu_fence_process(struct amdgpu_ring *ring);  int 
> amdgpu_fence_wait_empty(struct amdgpu_ring *ring);  unsigned 
> amdgpu_fence_count_emitted(struct amdgpu_ring *ring); diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>index ab05121..9757df1 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>@@ -22,7 +22,7 @@
>  */
> 
> #include "amdgpu.h"
>-#define MAX_KIQ_REG_WAIT      100000
>+#define MAX_KIQ_REG_WAIT      100000000 /* in usecs */
> 
> int amdgpu_allocate_static_csa(struct amdgpu_device *adev)  { @@ -113,28 
> +113,32 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
> 
> uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)  {
>-      signed long r;
>-      uint32_t val;
>-      struct dma_fence *f;
>+      signed long timeout = usecs_to_jiffies(MAX_KIQ_REG_WAIT);
>+      unsigned long flags;
>+      uint32_t val, seq, wait_seq;
>       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>       struct amdgpu_ring *ring = &kiq->ring;
> 
>       BUG_ON(!ring->funcs->emit_rreg);
> 
>-      mutex_lock(&kiq->ring_mutex);
>+      spin_lock_irqsave(&kiq->ring_lock, flags);
>       amdgpu_ring_alloc(ring, 32);
>       amdgpu_ring_emit_rreg(ring, reg);
>-      amdgpu_fence_emit(ring, &f);
>+      wait_seq = ++ring->fence_drv.sync_seq;
>+      amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>+                             wait_seq, AMDGPU_FENCE_FLAG_INT);
>       amdgpu_ring_commit(ring);
>-      mutex_unlock(&kiq->ring_mutex);
>-
>-      r = dma_fence_wait_timeout(f, false, 
>msecs_to_jiffies(MAX_KIQ_REG_WAIT));
>-      dma_fence_put(f);
>-      if (r < 1) {
>-              DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>+      spin_unlock_irqrestore(&kiq->ring_lock, flags);
>+      do {
>+              seq = amdgpu_fence_read(ring);
>+              udelay(5);
>+              timeout -= 5;
>+      } while (seq < wait_seq && timeout > 0);
>+
>+      if (timeout < 1) {
>+              DRM_ERROR("wait for kiq fence error: %ld\n", timeout);
>               return ~0;
>       }
>-
>       val = adev->wb.wb[adev->virt.reg_val_offs];
> 
>       return val;
>@@ -142,24 +146,33 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device 
>*adev, uint32_t reg)
> 
> void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t 
> v)  {
>-      signed long r;
>+      signed long timeout = usecs_to_jiffies(MAX_KIQ_REG_WAIT);
>+      unsigned long flags;
>+      uint32_t seq, wait_seq;
>       struct dma_fence *f;
>       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>       struct amdgpu_ring *ring = &kiq->ring;
> 
>       BUG_ON(!ring->funcs->emit_wreg);
> 
>-      mutex_lock(&kiq->ring_mutex);
>+      spin_lock_irqsave(&kiq->ring_lock, flags);
>       amdgpu_ring_alloc(ring, 32);
>       amdgpu_ring_emit_wreg(ring, reg, v);
>-      amdgpu_fence_emit(ring, &f);
>+      /* amdgpu_fence_emit(ring, &f); */
>+      wait_seq = ++ring->fence_drv.sync_seq;
>+      amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>+                             wait_seq, AMDGPU_FENCE_FLAG_INT);
>       amdgpu_ring_commit(ring);
>-      mutex_unlock(&kiq->ring_mutex);
>+      spin_unlock_irqrestore(&kiq->ring_lock, flags);
>+
>+      do {
>+              seq = amdgpu_fence_read(ring);
>+              udelay(5);
>+              timeout -= 5;
>+      } while (seq < wait_seq && timeout > 0);
> 
>-      r = dma_fence_wait_timeout(f, false, 
>msecs_to_jiffies(MAX_KIQ_REG_WAIT));
>-      if (r < 1)
>-              DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>-      dma_fence_put(f);
>+      if (timeout < 1)
>+              DRM_ERROR("wait for kiq fence error: %ld\n", timeout);
> }
> 
> /**
>--
>2.9.5
>
>_______________________________________________
>amd-gfx mailing list
>[email protected]
>https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to