On Thu, Dec 22, 2016 at 08:36:04AM +0000, Chris Wilson wrote:
> When we evict from the GTT to make room for an object, the hole we
> create is put onto the MRU stack inside the drm_mm range manager. On the
> next search pass, we can speed up a PIN_HIGH allocation by referencing
> that stack for the new hole.
> 
> v2: Pull together the 3 identical implements (ahem, a couple were
> outdated) into a common routine for allocating a node and evicting as
> necessary.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>

Since it doesn't apply directly to drm-misc I'm leaving this one out for
now. I guess best to merge through drm-intel?
-Daniel

> ---
>  drivers/gpu/drm/i915/gvt/aperture_gm.c | 33 +++++-----------
>  drivers/gpu/drm/i915/i915_gem_gtt.c    | 72 
> ++++++++++++++++++++++++----------
>  drivers/gpu/drm/i915/i915_gem_gtt.h    |  5 +++
>  drivers/gpu/drm/i915/i915_vma.c        | 40 ++-----------------
>  4 files changed, 70 insertions(+), 80 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c 
> b/drivers/gpu/drm/i915/gvt/aperture_gm.c
> index 7d33b607bc89..1bb7a5b80d47 100644
> --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
> +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
> @@ -48,47 +48,34 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
>  {
>       struct intel_gvt *gvt = vgpu->gvt;
>       struct drm_i915_private *dev_priv = gvt->dev_priv;
> -     u32 alloc_flag, search_flag;
> +     unsigned int flags;
>       u64 start, end, size;
>       struct drm_mm_node *node;
> -     int retried = 0;
>       int ret;
>  
>       if (high_gm) {
> -             search_flag = DRM_MM_SEARCH_BELOW;
> -             alloc_flag = DRM_MM_CREATE_TOP;
>               node = &vgpu->gm.high_gm_node;
>               size = vgpu_hidden_sz(vgpu);
>               start = gvt_hidden_gmadr_base(gvt);
>               end = gvt_hidden_gmadr_end(gvt);
> +             flags = PIN_HIGH;
>       } else {
> -             search_flag = DRM_MM_SEARCH_DEFAULT;
> -             alloc_flag = DRM_MM_CREATE_DEFAULT;
>               node = &vgpu->gm.low_gm_node;
>               size = vgpu_aperture_sz(vgpu);
>               start = gvt_aperture_gmadr_base(gvt);
>               end = gvt_aperture_gmadr_end(gvt);
> +             flags = PIN_MAPPABLE;
>       }
>  
>       mutex_lock(&dev_priv->drm.struct_mutex);
> -search_again:
> -     ret = drm_mm_insert_node_in_range_generic(&dev_priv->ggtt.base.mm,
> -                                               node, size, 4096,
> -                                               I915_COLOR_UNEVICTABLE,
> -                                               start, end, search_flag,
> -                                               alloc_flag);
> -     if (ret) {
> -             ret = i915_gem_evict_something(&dev_priv->ggtt.base,
> -                                            size, 4096,
> -                                            I915_COLOR_UNEVICTABLE,
> -                                            start, end, 0);
> -             if (ret == 0 && ++retried < 3)
> -                     goto search_again;
> -
> -             gvt_err("fail to alloc %s gm space from host, retried %d\n",
> -                             high_gm ? "high" : "low", retried);
> -     }
> +     ret = i915_gem_gtt_insert(&dev_priv->ggtt.base, node,
> +                               size, 4096, I915_COLOR_UNEVICTABLE,
> +                               start, end, flags);
>       mutex_unlock(&dev_priv->drm.struct_mutex);
> +     if (ret)
> +             gvt_err("fail to alloc %s gm space from host\n",
> +                     high_gm ? "high" : "low");
> +
>       return ret;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
> b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 6af9311f72f5..c8f1675852a7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2044,7 +2044,6 @@ static int gen6_ppgtt_allocate_page_directories(struct 
> i915_hw_ppgtt *ppgtt)
>       struct i915_address_space *vm = &ppgtt->base;
>       struct drm_i915_private *dev_priv = ppgtt->base.i915;
>       struct i915_ggtt *ggtt = &dev_priv->ggtt;
> -     bool retried = false;
>       int ret;
>  
>       /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
> @@ -2057,29 +2056,14 @@ static int 
> gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
>       if (ret)
>               return ret;
>  
> -alloc:
> -     ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, &ppgtt->node,
> -                                               GEN6_PD_SIZE, GEN6_PD_ALIGN,
> -                                               I915_COLOR_UNEVICTABLE,
> -                                               0, ggtt->base.total,
> -                                               DRM_MM_TOPDOWN);
> -     if (ret == -ENOSPC && !retried) {
> -             ret = i915_gem_evict_something(&ggtt->base,
> -                                            GEN6_PD_SIZE, GEN6_PD_ALIGN,
> -                                            I915_COLOR_UNEVICTABLE,
> -                                            0, ggtt->base.total,
> -                                            0);
> -             if (ret)
> -                     goto err_out;
> -
> -             retried = true;
> -             goto alloc;
> -     }
> -
> +     ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node,
> +                               GEN6_PD_SIZE, GEN6_PD_ALIGN,
> +                               I915_COLOR_UNEVICTABLE,
> +                               0, ggtt->base.total,
> +                               PIN_HIGH);
>       if (ret)
>               goto err_out;
>  
> -
>       if (ppgtt->node.start < ggtt->mappable_end)
>               DRM_DEBUG("Forced to use aperture for PDEs\n");
>  
> @@ -3553,3 +3537,49 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
>       return ret;
>  }
>  
> +int i915_gem_gtt_insert(struct i915_address_space *vm,
> +                     struct drm_mm_node *node,
> +                     u64 size, u64 alignment, unsigned long color,
> +                     u64 start, u64 end, unsigned int flags)
> +{
> +     u32 search_flag, alloc_flag;
> +     int err;
> +
> +     lockdep_assert_held(&vm->i915->drm.struct_mutex);
> +
> +     if (flags & PIN_HIGH) {
> +             search_flag = DRM_MM_SEARCH_BELOW;
> +             alloc_flag = DRM_MM_CREATE_TOP;
> +     } else {
> +             search_flag = DRM_MM_SEARCH_DEFAULT;
> +             alloc_flag = DRM_MM_CREATE_DEFAULT;
> +     }
> +
> +     /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
> +      * so we know that we always have a minimum alignment of 4096.
> +      * The drm_mm range manager is optimised to return results
> +      * with zero alignment, so where possible use the optimal
> +      * path.
> +      */
> +     GEM_BUG_ON(size & 4095);
> +     if (alignment <= 4096)
> +             alignment = 0;
> +
> +     err = drm_mm_insert_node_in_range_generic(&vm->mm, node,
> +                                               size, alignment, color,
> +                                               start, end,
> +                                               search_flag, alloc_flag);
> +     if (err != -ENOSPC)
> +             return err;
> +
> +     err = i915_gem_evict_something(vm, size, alignment, color,
> +                                    start, end, flags);
> +     if (err)
> +             return err;
> +
> +     search_flag = DRM_MM_SEARCH_DEFAULT;
> +     return drm_mm_insert_node_in_range_generic(&vm->mm, node,
> +                                                size, alignment, color,
> +                                                start, end,
> +                                                search_flag, alloc_flag);
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 
> b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 0055b8567a43..4c7bef07e38a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -528,6 +528,11 @@ int __must_check i915_gem_gtt_prepare_pages(struct 
> drm_i915_gem_object *obj,
>  void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
>                              struct sg_table *pages);
>  
> +int i915_gem_gtt_insert(struct i915_address_space *vm,
> +                     struct drm_mm_node *node,
> +                     u64 size, u64 alignment, unsigned long color,
> +                     u64 start, u64 end, unsigned int flags);
> +
>  /* Flags used by pin/bind&friends. */
>  #define PIN_NONBLOCK         BIT(0)
>  #define PIN_MAPPABLE         BIT(1)
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index fd75d5704287..608008d2d999 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -415,43 +415,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 
> alignment, u64 flags)
>                               goto err_unpin;
>               }
>       } else {
> -             u32 search_flag, alloc_flag;
> -
> -             if (flags & PIN_HIGH) {
> -                     search_flag = DRM_MM_SEARCH_BELOW;
> -                     alloc_flag = DRM_MM_CREATE_TOP;
> -             } else {
> -                     search_flag = DRM_MM_SEARCH_DEFAULT;
> -                     alloc_flag = DRM_MM_CREATE_DEFAULT;
> -             }
> -
> -             /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
> -              * so we know that we always have a minimum alignment of 4096.
> -              * The drm_mm range manager is optimised to return results
> -              * with zero alignment, so where possible use the optimal
> -              * path.
> -              */
> -             if (alignment <= 4096)
> -                     alignment = 0;
> -
> -search_free:
> -             ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
> -                                                       &vma->node,
> -                                                       size, alignment,
> -                                                       obj->cache_level,
> -                                                       start, end,
> -                                                       search_flag,
> -                                                       alloc_flag);
> -             if (ret) {
> -                     ret = i915_gem_evict_something(vma->vm, size, alignment,
> -                                                    obj->cache_level,
> -                                                    start, end,
> -                                                    flags);
> -                     if (ret == 0)
> -                             goto search_free;
> -
> +             ret = i915_gem_gtt_insert(vma->vm, &vma->node,
> +                                       size, alignment, obj->cache_level,
> +                                       start, end, flags);
> +             if (ret)
>                       goto err_unpin;
> -             }
>  
>               GEM_BUG_ON(vma->node.start < start);
>               GEM_BUG_ON(vma->node.start + vma->node.size > end);
> -- 
> 2.11.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

Reply via email to