On 17/10/2017, 4:20 PM, "Koenig, Christian" <[email protected]> wrote:
>Am 17.10.2017 um 08:37 schrieb Pixel Ding: >> From: pding <[email protected]> >> >> Register accessing is performed when IRQ is disabled. Never sleep in >> this function. >> >> Known issue: dead sleep in many use cases of index/data registers. >> >> v2: wrap polling fence functions. don't trigger IRQ for polling in >> case of wrongly fence signal. >> >> Signed-off-by: pding <[email protected]> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 4 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 4 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 +--- >> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 53 >> +++++++++++++++++++++++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 ++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 30 ++++++------- >> 8 files changed, 78 insertions(+), 29 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index ca212ef..cc06e62 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -885,7 +885,7 @@ struct amdgpu_mec { >> struct amdgpu_kiq { >> u64 eop_gpu_addr; >> struct amdgpu_bo *eop_obj; >> - struct mutex ring_mutex; >> + spinlock_t ring_lock; >> struct amdgpu_ring ring; >> struct amdgpu_irq_src irq; >> }; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c >> index 9d9965d..6d83573 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c >> @@ -788,7 +788,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device >> *adev, uint16_t pasid) >> struct dma_fence *f; >> struct amdgpu_ring *ring = &adev->gfx.kiq.ring; >> >> - mutex_lock(&adev->gfx.kiq.ring_mutex); >> + spin_lock(&adev->gfx.kiq.ring_lock); >> amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ >> amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); >> amdgpu_ring_write(ring, >> @@ -796,7 +796,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device >> *adev, uint16_t pasid) >> PACKET3_INVALIDATE_TLBS_PASID(pasid)); >> amdgpu_fence_emit(ring, &f); >> amdgpu_ring_commit(ring); >> - mutex_unlock(&adev->gfx.kiq.ring_mutex); >> + spin_unlock(&adev->gfx.kiq.ring_lock); >> >> r = dma_fence_wait(f, false); >> if (r) >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c >> index edbae19..c92217f 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c >> @@ -973,7 +973,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device >> *adev, uint16_t pasid) >> struct dma_fence *f; >> struct amdgpu_ring *ring = &adev->gfx.kiq.ring; >> >> - mutex_lock(&adev->gfx.kiq.ring_mutex); >> + spin_lock(&adev->gfx.kiq.ring_lock); >> amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ >> amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); >> amdgpu_ring_write(ring, >> @@ -983,7 +983,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device >> *adev, uint16_t pasid) >> PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(2)); >> amdgpu_fence_emit(ring, &f); >> amdgpu_ring_commit(ring); >> - mutex_unlock(&adev->gfx.kiq.ring_mutex); >> + spin_unlock(&adev->gfx.kiq.ring_lock); >> >> r = dma_fence_wait(f, false); >> if (r) >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> index ab8f0d6..1197274 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> @@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, >> uint32_t reg, >> { >> uint32_t ret; >> >> - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { >> - BUG_ON(in_interrupt()); >> + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) >> return amdgpu_virt_kiq_rreg(adev, reg); >> - } >> >> if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) >> ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); >> @@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, >> uint32_t reg, uint32_t v, >> adev->last_mm_index = v; >> } >> >> - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { >> - BUG_ON(in_interrupt()); >> + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) >> return amdgpu_virt_kiq_wreg(adev, reg, v); >> - } >> >> if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) >> writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c >> index 688740e..68a5e90 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c >> @@ -169,6 +169,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct >> dma_fence **f) >> } >> >> /** >> + * amdgpu_fence_emit_polling - emit a fence on the requeste ring >> + * >> + * @ring: ring the fence is associated with >> + * @s: resulting sequence number >> + * >> + * Emits a fence command on the requested ring (all asics). >> + * Used For polling fence. >> + * Returns 0 on success, -ENOMEM on failure. >> + */ >> +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) >> +{ >> + uint32_t seq; >> + >> + if (!s) >> + return -EINVAL; >> + >> + seq = ++ring->fence_drv.sync_seq; >> + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, >> + seq, AMDGPU_FENCE_FLAG_INT); >> + >> + *s = seq; >> + >> + return 0; >> +} >> + >> +/** >> * amdgpu_fence_schedule_fallback - schedule fallback check >> * >> * @ring: pointer to struct amdgpu_ring >> @@ -281,6 +307,33 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) >> return r; >> } >> > >Please add some documentation here that timeout is in usec. [Pixel] sure. > >> +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, >> + uint32_t wait_seq, >> + signed long timeout) >> +{ >> + uint32_t seq, last_seq; >> + struct amdgpu_fence_driver *drv = &ring->fence_drv; >> + >> + last_seq = atomic_read(&ring->fence_drv.last_seq); >> + >> + do { >> + seq = amdgpu_fence_read(ring); >> + >> + if (unlikely(seq == last_seq)) >> + break; > >That doesn't look correct to me, it will abort the busy wait as soon as >we see the last value we have seen. >[pixel]you’re right. My intention is to use the “seq" as latest submitted >one(sync_seq), then it means all submitted seqs are notified. This condition >could be removed. >> + if (seq >= wait_seq && wait_seq >= last_seq) >> + break; >> + if (seq <= last_seq && >> + (wait_seq <= seq || wait_seq >= last_seq)) >> + break; > >Why not just "(int32_t)(wait_seq - seq) > 0" ? IIRC that should be >sufficient for a wrap around test. >[pixel] seq could be larger (executed) or smaller (not yet) than wait_seq even >without a wrap round, I think you mean "(int32_t)(last_seq - seq) > 0” >it actually can know there’s a wrap around, but it still doesn't know if the >waited seq is in the range between or not, we still need other conditions. >Code here is to identify cases to break as: ===last_seq=====wait_seq====seq=== ===wait_seq==seq========last_seq== ==seq==========last_seq==wait_seq= Does it make sense? Regards. Pixel > >> + udelay(5); >> + timeout -= 5; >> + } while (timeout > 0); >> + >> + atomic_cmpxchg(&drv->last_seq, last_seq, wait_seq); >> + >> + return timeout; >> +} >> /** >> * amdgpu_fence_count_emitted - get the count of emitted fences >> * >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> index 4f6c68f..e5a9077 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> @@ -185,7 +185,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, >> struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> int r = 0; >> >> - mutex_init(&kiq->ring_mutex); >> + spin_lock_init(&kiq->ring_lock); >> >> r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); >> if (r) >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >> index af8e544..9de89ea 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >> @@ -89,8 +89,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring >> *ring, >> void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); >> void amdgpu_fence_driver_resume(struct amdgpu_device *adev); >> int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); >> +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); >> void amdgpu_fence_process(struct amdgpu_ring *ring); >> int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); >> +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, >> + uint32_t wait_seq, >> + signed long timeout); >> unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); >> >> /* >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> index ab05121..177fe10 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> @@ -22,7 +22,7 @@ >> */ >> >> #include "amdgpu.h" >> -#define MAX_KIQ_REG_WAIT 100000 >> +#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */ >> >> int amdgpu_allocate_static_csa(struct amdgpu_device *adev) >> { >> @@ -114,27 +114,24 @@ void amdgpu_virt_init_setting(struct amdgpu_device >> *adev) >> uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) >> { >> signed long r; >> - uint32_t val; >> - struct dma_fence *f; >> + uint32_t val, seq; >> struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> struct amdgpu_ring *ring = &kiq->ring; >> >> BUG_ON(!ring->funcs->emit_rreg); >> >> - mutex_lock(&kiq->ring_mutex); >> + spin_lock(&kiq->ring_lock); >> amdgpu_ring_alloc(ring, 32); >> amdgpu_ring_emit_rreg(ring, reg); >> - amdgpu_fence_emit(ring, &f); >> + amdgpu_fence_emit_polling(ring, &seq); >> amdgpu_ring_commit(ring); >> - mutex_unlock(&kiq->ring_mutex); >> + spin_unlock(&kiq->ring_lock); >> >> - r = dma_fence_wait_timeout(f, false, >> msecs_to_jiffies(MAX_KIQ_REG_WAIT)); >> - dma_fence_put(f); >> + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); >> if (r < 1) { >> - DRM_ERROR("wait for kiq fence error: %ld.\n", r); >> + DRM_ERROR("wait for kiq fence error: %ld\n", r); >> return ~0; >> } >> - >> val = adev->wb.wb[adev->virt.reg_val_offs]; >> >> return val; >> @@ -143,23 +140,22 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device >> *adev, uint32_t reg) >> void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, >> uint32_t v) >> { >> signed long r; >> - struct dma_fence *f; >> + uint32_t seq; >> struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> struct amdgpu_ring *ring = &kiq->ring; >> >> BUG_ON(!ring->funcs->emit_wreg); >> >> - mutex_lock(&kiq->ring_mutex); >> + spin_lock(&kiq->ring_lock); >> amdgpu_ring_alloc(ring, 32); >> amdgpu_ring_emit_wreg(ring, reg, v); >> - amdgpu_fence_emit(ring, &f); >> + amdgpu_fence_emit_polling(ring, &seq); >> amdgpu_ring_commit(ring); >> - mutex_unlock(&kiq->ring_mutex); >> + spin_unlock(&kiq->ring_lock); >> >> - r = dma_fence_wait_timeout(f, false, >> msecs_to_jiffies(MAX_KIQ_REG_WAIT)); >> + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); >> if (r < 1) >> - DRM_ERROR("wait for kiq fence error: %ld.\n", r); >> - dma_fence_put(f); >> + DRM_ERROR("wait for kiq fence error: %ld\n", r); >> } >> >> /** > > _______________________________________________ amd-gfx mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/amd-gfx
