On Fri, Jul 13, 2012 at 11:35 PM, Maarten Lankhorst
<m.b.lankhorst at gmail.com> wrote:
> A way to trigger an irq will be needed for optimus support since
> cpu-waiting isn't always viable there. This could also be nice for
> power saving on since cpu would no longer have to spin, and
> performance might improve slightly on cpu-limited workloads.
>
> Some way to quantify these effects would be nice, even if the
> end result would be 'no performance regression'. An earlier
> version always emitted an interrupt, resulting in glxgears going
> from 8k fps to 7k. However this is no longer the case, as I'm
> using the kernel submission channel for generating irqs as
> needed now.
>
> On nv84 I'm using NOTIFY_INTR, but that might have been
> removed on fermi, so instead I'm using invalid command
> 0x0058 now as a way to signal completion.
Out of curiosity, isn't this like a handcoded version of software
methods? If so, why handcoded? Or are software methods not supported
on NVC0?
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at canonical.com>
>
> ---
> drivers/gpu/drm/nouveau/nouveau_drv.h | 2 +
> drivers/gpu/drm/nouveau/nouveau_fence.c | 49
> ++++++++++++++++++++++++++++---
> drivers/gpu/drm/nouveau/nouveau_fifo.h | 1 +
> drivers/gpu/drm/nouveau/nouveau_state.c | 1 +
> drivers/gpu/drm/nouveau/nv04_fifo.c | 25 ++++++++++++++++
> drivers/gpu/drm/nouveau/nv84_fence.c | 18 +++++++++--
> drivers/gpu/drm/nouveau/nvc0_fence.c | 12 ++++++--
> drivers/gpu/drm/nouveau/nvc0_fifo.c | 3 +-
> drivers/gpu/drm/nouveau/nve0_fifo.c | 15 +++++++--
> 9 files changed, 110 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h
> b/drivers/gpu/drm/nouveau/nouveau_drv.h
> index f97a1a7..d9d274d 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
> @@ -707,6 +707,7 @@ struct drm_nouveau_private {
> struct drm_mm heap;
> struct nouveau_bo *bo;
> } fence;
> + wait_queue_head_t fence_wq;
>
> struct {
> spinlock_t lock;
> @@ -1656,6 +1657,7 @@ nv44_graph_class(struct drm_device *dev)
> #define NV84_SUBCHAN_WRCACHE_FLUSH
> 0x00000024
> #define NV10_SUBCHAN_REF_CNT
> 0x00000050
> #define NVSW_SUBCHAN_PAGE_FLIP
> 0x00000054
> +#define NVSW_SUBCHAN_FENCE_WAKE
> 0x00000058
> #define NV11_SUBCHAN_DMA_SEMAPHORE
> 0x00000060
> #define NV11_SUBCHAN_SEMAPHORE_OFFSET
> 0x00000064
> #define NV11_SUBCHAN_SEMAPHORE_ACQUIRE
> 0x00000068
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c
> b/drivers/gpu/drm/nouveau/nouveau_fence.c
> index 3c18049..3ba8dee 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> @@ -68,7 +68,7 @@ nouveau_fence_update(struct nouveau_channel *chan)
>
> spin_lock(&fctx->lock);
> list_for_each_entry_safe(fence, fnext, &fctx->pending, head) {
> - if (priv->read(chan) < fence->sequence)
> + if (priv->read(chan) - fence->sequence >= 0x80000000U)
> break;
>
> if (fence->work)
> @@ -111,11 +111,9 @@ nouveau_fence_done(struct nouveau_fence *fence)
> return !fence->channel;
> }
>
> -int
> -nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
> +static int nouveau_fence_wait_busy(struct nouveau_fence *fence, bool lazy,
> bool intr)
> {
> unsigned long sleep_time = NSEC_PER_MSEC / 1000;
> - ktime_t t;
> int ret = 0;
>
> while (!nouveau_fence_done(fence)) {
> @@ -127,7 +125,7 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool
> lazy, bool intr)
> __set_current_state(intr ? TASK_INTERRUPTIBLE :
> TASK_UNINTERRUPTIBLE);
> if (lazy) {
> - t = ktime_set(0, sleep_time);
> + ktime_t t = ktime_set(0, sleep_time);
> schedule_hrtimeout(&t, HRTIMER_MODE_REL);
> sleep_time *= 2;
> if (sleep_time > NSEC_PER_MSEC)
> @@ -144,6 +142,47 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool
> lazy, bool intr)
> return ret;
> }
>
> +static int nouveau_fence_wait_event(struct nouveau_fence *fence, bool intr)
> +{
> + struct drm_nouveau_private *dev_priv =
> fence->channel->dev->dev_private;
> + unsigned long timeout = fence->timeout;
> + int ret = 0;
> + struct nouveau_channel *chan = dev_priv->channel;
> + struct nouveau_channel *prev = fence->channel;
> + struct nouveau_fence_priv *priv = nv_engine(chan->dev,
> NVOBJ_ENGINE_FENCE);
> +
> + if (nouveau_fence_done(fence))
> + return 0;
> +
> + if (!timeout)
> + timeout = jiffies + 3 * DRM_HZ;
> +
> + if (prev != chan)
> + ret = priv->sync(fence, prev, chan);
> + if (ret)
> + goto busy;
> +
> + if (intr)
> + ret = wait_event_interruptible_timeout(dev_priv->fence_wq,
> nouveau_fence_done(fence), timeout);
> + else
> + ret = wait_event_timeout(dev_priv->fence_wq,
> nouveau_fence_done(fence), timeout);
> +
> + return ret;
> +
> +busy:
> + return nouveau_fence_wait_busy(fence, true, intr);
> +}
> +
> +int
> +nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
> +{
> + struct drm_nouveau_private *dev_priv =
> fence->channel->dev->dev_private;
> + if (dev_priv->chipset >= 0x84 && dev_priv->channel && lazy)
> + return nouveau_fence_wait_event(fence, intr);
> + else
> + return nouveau_fence_wait_busy(fence, lazy, intr);
> +}
> +
> int
> nouveau_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *chan)
> {
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fifo.h
> b/drivers/gpu/drm/nouveau/nouveau_fifo.h
> index ce99cab..942e211 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fifo.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_fifo.h
> @@ -15,6 +15,7 @@ int nv04_fifo_fini(struct drm_device *, int, bool);
> int nv04_fifo_init(struct drm_device *, int);
> void nv04_fifo_isr(struct drm_device *);
> void nv04_fifo_destroy(struct drm_device *, int);
> +bool nouveau_fifo_wakeup(struct drm_device *dev, u32 chid);
>
> void nv50_fifo_playlist_update(struct drm_device *);
> void nv50_fifo_destroy(struct drm_device *, int);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c
> b/drivers/gpu/drm/nouveau/nouveau_state.c
> index 19706f0..6cdfacb 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_state.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_state.c
> @@ -515,6 +515,7 @@ nouveau_card_channel_init(struct drm_device *dev)
> dev_priv->channel = chan;
> if (ret)
> return ret;
> + init_waitqueue_head(&dev_priv->fence_wq);
> mutex_unlock(&dev_priv->channel->mutex);
>
> nouveau_bo_move_init(chan);
> diff --git a/drivers/gpu/drm/nouveau/nv04_fifo.c
> b/drivers/gpu/drm/nouveau/nv04_fifo.c
> index a6295cd..1665a82 100644
> --- a/drivers/gpu/drm/nouveau/nv04_fifo.c
> +++ b/drivers/gpu/drm/nouveau/nv04_fifo.c
> @@ -307,6 +307,26 @@ out:
> return handled;
> }
>
> +bool nouveau_fifo_wakeup(struct drm_device *dev, u32 chid)
> +{
> + struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
> + struct drm_nouveau_private *dev_priv = dev->dev_private;
> + struct nouveau_channel *chan = NULL;
> + bool handled = false;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&dev_priv->channels.lock, flags);
> + if (likely(chid >= 0 && chid < pfifo->channels)) {
> + chan = dev_priv->channel;
> + if (chan->id == chid) {
> + wake_up_all(&dev_priv->fence_wq);
> + handled = true;
> + }
> + }
> + spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
> + return handled;
> +}
> +
> static const char *nv_dma_state_err(u32 state)
> {
> static const char * const desc[] = {
> @@ -448,6 +468,11 @@ nv04_fifo_isr(struct drm_device *dev)
> status &= ~0x00000010;
> nv_wr32(dev, 0x002100, 0x00000010);
> }
> + if (status & 0x80000000) {
> + nouveau_fifo_wakeup(dev, chid);
> + status &= ~0x80000000;
> + nv_wr32(dev, 0x002100, 0x80000000);
> + }
> }
>
> if (status) {
> diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c
> b/drivers/gpu/drm/nouveau/nv84_fence.c
> index c2f889b..2b8c3e5 100644
> --- a/drivers/gpu/drm/nouveau/nv84_fence.c
> +++ b/drivers/gpu/drm/nouveau/nv84_fence.c
> @@ -42,15 +42,20 @@ static int
> nv84_fence_emit(struct nouveau_fence *fence)
> {
> struct nouveau_channel *chan = fence->channel;
> - int ret = RING_SPACE(chan, 7);
> + struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
> + bool intr = dev_priv->channel == chan;
> +
> + int ret = RING_SPACE(chan, 7 + intr);
> if (ret == 0) {
> BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
> OUT_RING (chan, NvSema);
> - BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
> + BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4 +
> intr);
> OUT_RING (chan, upper_32_bits(chan->id * 16));
> OUT_RING (chan, lower_32_bits(chan->id * 16));
> OUT_RING (chan, fence->sequence);
> OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG);
> + if (intr)
> + OUT_RING (chan, 0);
> FIRE_RING (chan);
> }
> return ret;
> @@ -61,15 +66,20 @@ static int
> nv84_fence_sync(struct nouveau_fence *fence,
> struct nouveau_channel *prev, struct nouveau_channel *chan)
> {
> - int ret = RING_SPACE(chan, 7);
> + struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
> + bool intr = dev_priv->channel == chan;
> +
> + int ret = RING_SPACE(chan, 7 + intr);
> if (ret == 0) {
> BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
> OUT_RING (chan, NvSema);
> - BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
> + BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4 +
> intr);
> OUT_RING (chan, upper_32_bits(prev->id * 16));
> OUT_RING (chan, lower_32_bits(prev->id * 16));
> OUT_RING (chan, fence->sequence);
> OUT_RING (chan,
> NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL);
> + if (intr)
> + OUT_RING (chan, 0);
> FIRE_RING (chan);
> }
> return ret;
> diff --git a/drivers/gpu/drm/nouveau/nvc0_fence.c
> b/drivers/gpu/drm/nouveau/nvc0_fence.c
> index 47ab388..25805ef 100644
> --- a/drivers/gpu/drm/nouveau/nvc0_fence.c
> +++ b/drivers/gpu/drm/nouveau/nvc0_fence.c
> @@ -45,15 +45,19 @@ nvc0_fence_emit(struct nouveau_fence *fence)
> struct nouveau_channel *chan = fence->channel;
> struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
> u64 addr = fctx->vma.offset + chan->id * 16;
> + struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
> + bool intr = dev_priv->channel == chan;
> int ret;
>
> - ret = RING_SPACE(chan, 5);
> + ret = RING_SPACE(chan, 5 + intr);
> if (ret == 0) {
> BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
> OUT_RING (chan, upper_32_bits(addr));
> OUT_RING (chan, lower_32_bits(addr));
> OUT_RING (chan, fence->sequence);
> OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG);
> + if (intr)
> + BEGIN_IMC0(chan, 0, 0x058, 0);
> FIRE_RING (chan);
> }
>
> @@ -64,11 +68,13 @@ static int
> nvc0_fence_sync(struct nouveau_fence *fence,
> struct nouveau_channel *prev, struct nouveau_channel *chan)
> {
> + struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
> struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
> u64 addr = fctx->vma.offset + prev->id * 16;
> + bool intr = dev_priv->channel == chan;
> int ret;
>
> - ret = RING_SPACE(chan, 5);
> + ret = RING_SPACE(chan, 5 + intr);
> if (ret == 0) {
> BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
> OUT_RING (chan, upper_32_bits(addr));
> @@ -76,6 +82,8 @@ nvc0_fence_sync(struct nouveau_fence *fence,
> OUT_RING (chan, fence->sequence);
> OUT_RING (chan,
> NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL |
> NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD);
> + if (intr)
> + BEGIN_IMC0(chan, 0, NVSW_SUBCHAN_FENCE_WAKE, 0);
> FIRE_RING (chan);
> }
>
> diff --git a/drivers/gpu/drm/nouveau/nvc0_fifo.c
> b/drivers/gpu/drm/nouveau/nvc0_fifo.c
> index 7d85553..b3930c7 100644
> --- a/drivers/gpu/drm/nouveau/nvc0_fifo.c
> +++ b/drivers/gpu/drm/nouveau/nvc0_fifo.c
> @@ -356,7 +356,8 @@ nvc0_fifo_isr_subfifo_intr(struct drm_device *dev, int
> unit)
> if (mthd == 0x0054) {
> if (!nvc0_fifo_page_flip(dev, chid))
> show &= ~0x00200000;
> - }
> + } else if (mthd == 0x0058 && nouveau_fifo_wakeup(dev, chid))
> + show &= ~0x00200000;
> }
>
> if (show) {
> diff --git a/drivers/gpu/drm/nouveau/nve0_fifo.c
> b/drivers/gpu/drm/nouveau/nve0_fifo.c
> index 1855ecb..33e41cd 100644
> --- a/drivers/gpu/drm/nouveau/nve0_fifo.c
> +++ b/drivers/gpu/drm/nouveau/nve0_fifo.c
> @@ -303,11 +303,18 @@ nve0_fifo_isr_subfifo_intr(struct drm_device *dev, int
> unit)
> u32 chid = nv_rd32(dev, 0x040120 + (unit * 0x2000)) & 0x7f;
> u32 subc = (addr & 0x00070000);
> u32 mthd = (addr & 0x00003ffc);
> + u32 show = stat;
>
> - NV_INFO(dev, "PSUBFIFO %d:", unit);
> - nouveau_bitfield_print(nve0_fifo_subfifo_intr, stat);
> - NV_INFO(dev, "PSUBFIFO %d: ch %d subc %d mthd 0x%04x data 0x%08x\n",
> - unit, chid, subc, mthd, data);
> + if (stat & 0x00200000 && mthd == 0x0058 &&
> + nouveau_fifo_wakeup(dev, chid))
> + show &= ~0x00200000;
> +
> + if (show) {
> + NV_INFO(dev, "PSUBFIFO %d:", unit);
> + nouveau_bitfield_print(nve0_fifo_subfifo_intr, show);
> + NV_INFO(dev, "PSUBFIFO %d: ch %d subc %d mthd 0x%04x data
> 0x%08x\n",
> + unit, chid, subc, mthd, data);
> + }
>
> nv_wr32(dev, 0x0400c0 + (unit * 0x2000), 0x80600008);
> nv_wr32(dev, 0x040108 + (unit * 0x2000), stat);
>
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
--
Far away from the primal instinct, the song seems to fade away, the
river get wider between your thoughts and the things we do and say.