On 17/10/2017, 4:20 PM, "Koenig, Christian" <[email protected]> wrote:

>Am 17.10.2017 um 08:37 schrieb Pixel Ding:
>> From: pding <[email protected]>
>>
>> Register accessing is performed when IRQ is disabled. Never sleep in
>> this function.
>>
>> Known issue: dead sleep in many use cases of index/data registers.
>>
>> v2: wrap polling fence functions. don't trigger IRQ for polling in
>> case of wrongly fence signal.
>>
>> Signed-off-by: pding <[email protected]>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h               |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  4 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  4 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c        |  8 +---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c         | 53 
>> +++++++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c           |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h          |  4 ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c          | 30 ++++++-------
>>   8 files changed, 78 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index ca212ef..cc06e62 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -885,7 +885,7 @@ struct amdgpu_mec {
>>   struct amdgpu_kiq {
>>      u64                     eop_gpu_addr;
>>      struct amdgpu_bo        *eop_obj;
>> -    struct mutex            ring_mutex;
>> +    spinlock_t              ring_lock;
>>      struct amdgpu_ring      ring;
>>      struct amdgpu_irq_src   irq;
>>   };
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>> index 9d9965d..6d83573 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>> @@ -788,7 +788,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device 
>> *adev, uint16_t pasid)
>>      struct dma_fence *f;
>>      struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>>   
>> -    mutex_lock(&adev->gfx.kiq.ring_mutex);
>> +    spin_lock(&adev->gfx.kiq.ring_lock);
>>      amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
>>      amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
>>      amdgpu_ring_write(ring,
>> @@ -796,7 +796,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device 
>> *adev, uint16_t pasid)
>>                      PACKET3_INVALIDATE_TLBS_PASID(pasid));
>>      amdgpu_fence_emit(ring, &f);
>>      amdgpu_ring_commit(ring);
>> -    mutex_unlock(&adev->gfx.kiq.ring_mutex);
>> +    spin_unlock(&adev->gfx.kiq.ring_lock);
>>   
>>      r = dma_fence_wait(f, false);
>>      if (r)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
>> index edbae19..c92217f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
>> @@ -973,7 +973,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device 
>> *adev, uint16_t pasid)
>>      struct dma_fence *f;
>>      struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>>   
>> -    mutex_lock(&adev->gfx.kiq.ring_mutex);
>> +    spin_lock(&adev->gfx.kiq.ring_lock);
>>      amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
>>      amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
>>      amdgpu_ring_write(ring,
>> @@ -983,7 +983,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device 
>> *adev, uint16_t pasid)
>>                      PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(2));
>>      amdgpu_fence_emit(ring, &f);
>>      amdgpu_ring_commit(ring);
>> -    mutex_unlock(&adev->gfx.kiq.ring_mutex);
>> +    spin_unlock(&adev->gfx.kiq.ring_lock);
>>   
>>      r = dma_fence_wait(f, false);
>>      if (r)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index ab8f0d6..1197274 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, 
>> uint32_t reg,
>>   {
>>      uint32_t ret;
>>   
>> -    if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
>> -            BUG_ON(in_interrupt());
>> +    if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
>>              return amdgpu_virt_kiq_rreg(adev, reg);
>> -    }
>>   
>>      if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
>>              ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
>> @@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, 
>> uint32_t reg, uint32_t v,
>>              adev->last_mm_index = v;
>>      }
>>   
>> -    if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
>> -            BUG_ON(in_interrupt());
>> +    if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
>>              return amdgpu_virt_kiq_wreg(adev, reg, v);
>> -    }
>>   
>>      if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
>>              writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> index 688740e..68a5e90 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> @@ -169,6 +169,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
>> dma_fence **f)
>>   }
>>   
>>   /**
>> + * amdgpu_fence_emit_polling - emit a fence on the requeste ring
>> + *
>> + * @ring: ring the fence is associated with
>> + * @s: resulting sequence number
>> + *
>> + * Emits a fence command on the requested ring (all asics).
>> + * Used For polling fence.
>> + * Returns 0 on success, -ENOMEM on failure.
>> + */
>> +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
>> +{
>> +    uint32_t seq;
>> +
>> +    if (!s)
>> +            return -EINVAL;
>> +
>> +    seq = ++ring->fence_drv.sync_seq;
>> +    amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>> +                           seq, AMDGPU_FENCE_FLAG_INT);
>> +
>> +    *s = seq;
>> +
>> +    return 0;
>> +}
>> +
>> +/**
>>    * amdgpu_fence_schedule_fallback - schedule fallback check
>>    *
>>    * @ring: pointer to struct amdgpu_ring
>> @@ -281,6 +307,33 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
>>      return r;
>>   }
>>   
>
>Please add some documentation here that timeout is in usec.
[Pixel] sure.
>
>> +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
>> +                                  uint32_t wait_seq,
>> +                                  signed long timeout)
>> +{
>> +    uint32_t seq, last_seq;
>> +    struct amdgpu_fence_driver *drv = &ring->fence_drv;
>> +
>> +    last_seq = atomic_read(&ring->fence_drv.last_seq);
>> +
>> +    do {
>> +            seq = amdgpu_fence_read(ring);
>> +
>> +            if (unlikely(seq == last_seq))
>> +                    break;
>
>That doesn't look correct to me, it will abort the busy wait as soon as 
>we see the last value we have seen.
>[pixel]you’re right. My intention is to use the “seq" as latest submitted 
>one(sync_seq), then it means all submitted seqs are notified. This condition 
>could be removed.
>> +            if (seq >= wait_seq && wait_seq >= last_seq)
>> +                    break;
>> +            if (seq <= last_seq &&
>> +                (wait_seq <= seq || wait_seq >= last_seq))
>> +                    break;
>
>Why not just "(int32_t)(wait_seq - seq) > 0" ? IIRC that should be 
>sufficient for a wrap around test.
>[pixel] seq could be larger (executed) or smaller (not yet) than wait_seq even 
>without a wrap round, I think you mean "(int32_t)(last_seq - seq) > 0”
>it actually can know there’s a wrap around, but it still doesn't know if the 
>waited seq is in the range between or not, we still need other conditions. 
>Code here is to identify cases to break as:

===last_seq=====wait_seq====seq===
===wait_seq==seq========last_seq==
==seq==========last_seq==wait_seq=

Does it make sense?

Regards.
Pixel
>
>> +            udelay(5);
>> +            timeout -= 5;
>> +    } while (timeout > 0);
>> +
>> +    atomic_cmpxchg(&drv->last_seq, last_seq, wait_seq);
>> +
>> +    return timeout;
>> +}
>>   /**
>>    * amdgpu_fence_count_emitted - get the count of emitted fences
>>    *
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index 4f6c68f..e5a9077 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -185,7 +185,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
>>      struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>>      int r = 0;
>>   
>> -    mutex_init(&kiq->ring_mutex);
>> +    spin_lock_init(&kiq->ring_lock);
>>   
>>      r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
>>      if (r)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> index af8e544..9de89ea 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> @@ -89,8 +89,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring 
>> *ring,
>>   void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
>>   void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
>>   int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
>> +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
>>   void amdgpu_fence_process(struct amdgpu_ring *ring);
>>   int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
>> +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
>> +                                  uint32_t wait_seq,
>> +                                  signed long timeout);
>>   unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
>>   
>>   /*
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> index ab05121..177fe10 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> @@ -22,7 +22,7 @@
>>    */
>>   
>>   #include "amdgpu.h"
>> -#define MAX_KIQ_REG_WAIT    100000
>> +#define MAX_KIQ_REG_WAIT    100000000 /* in usecs */
>>   
>>   int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
>>   {
>> @@ -114,27 +114,24 @@ void amdgpu_virt_init_setting(struct amdgpu_device 
>> *adev)
>>   uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>>   {
>>      signed long r;
>> -    uint32_t val;
>> -    struct dma_fence *f;
>> +    uint32_t val, seq;
>>      struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>>      struct amdgpu_ring *ring = &kiq->ring;
>>   
>>      BUG_ON(!ring->funcs->emit_rreg);
>>   
>> -    mutex_lock(&kiq->ring_mutex);
>> +    spin_lock(&kiq->ring_lock);
>>      amdgpu_ring_alloc(ring, 32);
>>      amdgpu_ring_emit_rreg(ring, reg);
>> -    amdgpu_fence_emit(ring, &f);
>> +    amdgpu_fence_emit_polling(ring, &seq);
>>      amdgpu_ring_commit(ring);
>> -    mutex_unlock(&kiq->ring_mutex);
>> +    spin_unlock(&kiq->ring_lock);
>>   
>> -    r = dma_fence_wait_timeout(f, false, 
>> msecs_to_jiffies(MAX_KIQ_REG_WAIT));
>> -    dma_fence_put(f);
>> +    r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
>>      if (r < 1) {
>> -            DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>> +            DRM_ERROR("wait for kiq fence error: %ld\n", r);
>>              return ~0;
>>      }
>> -
>>      val = adev->wb.wb[adev->virt.reg_val_offs];
>>   
>>      return val;
>> @@ -143,23 +140,22 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device 
>> *adev, uint32_t reg)
>>   void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, 
>> uint32_t v)
>>   {
>>      signed long r;
>> -    struct dma_fence *f;
>> +    uint32_t seq;
>>      struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>>      struct amdgpu_ring *ring = &kiq->ring;
>>   
>>      BUG_ON(!ring->funcs->emit_wreg);
>>   
>> -    mutex_lock(&kiq->ring_mutex);
>> +    spin_lock(&kiq->ring_lock);
>>      amdgpu_ring_alloc(ring, 32);
>>      amdgpu_ring_emit_wreg(ring, reg, v);
>> -    amdgpu_fence_emit(ring, &f);
>> +    amdgpu_fence_emit_polling(ring, &seq);
>>      amdgpu_ring_commit(ring);
>> -    mutex_unlock(&kiq->ring_mutex);
>> +    spin_unlock(&kiq->ring_lock);
>>   
>> -    r = dma_fence_wait_timeout(f, false, 
>> msecs_to_jiffies(MAX_KIQ_REG_WAIT));
>> +    r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
>>      if (r < 1)
>> -            DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>> -    dma_fence_put(f);
>> +            DRM_ERROR("wait for kiq fence error: %ld\n", r);
>>   }
>>   
>>   /**
>
>
_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to