Ben Widawsky <[email protected]> writes:

> GEN8 removes the GT FIFO which we've all come to know and love. Instead
> it offers a wider range of optimized registers which always keep a
> shadowed copy, and are fed to the GPU when it wakes.
>
> How this is implemented in hardware is still somewhat of a mystery. As
> far as I can tell, the basic design is as follows:
>
> If the register is not optimized, you must use the old forcewake
> mechanism to bring the GT out of sleep. [1]
>
> If register is in the optimized list the write will signal that the
> GT should begin to come out of whatever sleep state it is in.
>
> While the GT is coming out of sleep, the requested write will be stored
> in an intermediate shadow register.
>
> Do to the fact that the implementation details are not clear, I see
> several risks:
> 1. Order is not preserved as it is with GT FIFO. If we issue multiple
> writes to optimized registers, where order matters, we may need to
> serialize it with forcewake.
> 2. The optimized registers have only 1 shadowed slot, meaning if we
> issue multiple writes to the same register, and those values need to
> reach the GPU in order, forcewake will be required.
>
> [1] We could implement a SW queue the way the GT FIFO used to work if
> desired.
>
> NOTE: Compile tested only until we get real silicon.
>
> v2:
> - Use a default case to make future platforms also work.
> - Get rid of IS_BROADWELL since that's not yet defined, but we want to
>   MMIO as soon as possible.
>
> Signed-off-by: Ben Widawsky <[email protected]> (v1)
> Signed-off-by: Daniel Vetter <[email protected]>
> ---
>  drivers/gpu/drm/i915/intel_uncore.c | 53 
> ++++++++++++++++++++++++++++++++++++-
>  1 file changed, 52 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
> b/drivers/gpu/drm/i915/intel_uncore.c
> index f6fae35..fa06ce4 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -93,7 +93,7 @@ static void __gen6_gt_force_wake_mt_get(struct 
> drm_i915_private *dev_priv)
>  {
>       u32 forcewake_ack;
>  
> -     if (IS_HASWELL(dev_priv->dev))
> +     if (IS_HASWELL(dev_priv->dev) || IS_GEN8(dev_priv->dev))
>               forcewake_ack = FORCEWAKE_ACK_HSW;
>       else
>               forcewake_ack = FORCEWAKE_MT_ACK;
> @@ -459,6 +459,47 @@ hsw_write##x(struct drm_i915_private *dev_priv, off_t 
> reg, u##x val, bool trace)
>       spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
>  }
>  
> +static const u32 gen8_optimized_regs[] = {
> +     FORCEWAKE_MT,
> +     GEN6_RPNSWREQ,
> +     GEN6_RC_VIDEO_FREQ,
> +     RING_TAIL(RENDER_RING_BASE),
> +     RING_TAIL(GEN6_BSD_RING_BASE),
> +     RING_TAIL(VEBOX_RING_BASE),
> +     RING_TAIL(BLT_RING_BASE),
> +     /* TODO: Other registers are not yet used */
> +};
> +

s/optimized/shadowed would make more sense i think.

> +static bool is_gen8_optimized(struct drm_i915_private *dev_priv, u32 reg)
> +{
> +     int i;
> +     for (i = 0; i < ARRAY_SIZE(gen8_optimized_regs); i++)
> +             if (reg == gen8_optimized_regs[i])
> +                     return false;

The logic is reversed here with regards to function name.

> +
> +     return true;
> +}
> +
> +#define __gen8_write(x) \
> +static void \
> +gen8_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool 
> trace) { \
> +     bool __needs_put = false; \
                       
This initialization is unnecessary.

> +     REG_WRITE_HEADER; \
> +     __needs_put = is_gen8_optimized(dev_priv, reg); \

This logic is backwards.
Please consider:

        __needs_put = !is_gen8_shadowed(dev_priv, reg);


-- Mika

> +     if (__needs_put) { \
> +             dev_priv->uncore.funcs.force_wake_get(dev_priv); \
> +     } \
> +     __raw_i915_write##x(dev_priv, reg, val); \
> +     if (__needs_put) { \
> +             dev_priv->uncore.funcs.force_wake_put(dev_priv); \
> +     } \
> +     spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
> +}
> +
> +__gen8_write(8)
> +__gen8_write(16)
> +__gen8_write(32)
> +__gen8_write(64)
>  __hsw_write(8)
>  __hsw_write(16)
>  __hsw_write(32)
> @@ -476,6 +517,7 @@ __gen4_write(16)
>  __gen4_write(32)
>  __gen4_write(64)
>  
> +#undef __gen8_write
>  #undef __hsw_write
>  #undef __gen6_write
>  #undef __gen5_write
> @@ -534,6 +576,15 @@ void intel_uncore_init(struct drm_device *dev)
>       }
>  
>       switch (INTEL_INFO(dev)->gen) {
> +     default:
> +             dev_priv->uncore.funcs.mmio_writeb  = gen8_write8;
> +             dev_priv->uncore.funcs.mmio_writew  = gen8_write16;
> +             dev_priv->uncore.funcs.mmio_writel  = gen8_write32;
> +             dev_priv->uncore.funcs.mmio_writeq  = gen8_write64;
> +             dev_priv->uncore.funcs.mmio_readb  = gen6_read8;
> +             dev_priv->uncore.funcs.mmio_readw  = gen6_read16;
> +             dev_priv->uncore.funcs.mmio_readl  = gen6_read32;
> +             dev_priv->uncore.funcs.mmio_readq  = gen6_read64;
>       case 7:
>       case 6:
>               if (IS_HASWELL(dev)) {
> -- 
> 1.8.4.2
>
> _______________________________________________
> Intel-gfx mailing list
> [email protected]
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to