pf: Increase PF GuC Buffer Cache size and use it for VF migration

Michał Winiarski Tue, 28 Oct 2025 03:46:54 -0700

On Thu, Oct 23, 2025 at 07:37:48PM +0200, Michal Wajdeczko wrote:
> 
> 
> On 10/22/2025 12:41 AM, Michał Winiarski wrote:
> > Contiguous PF GGTT VMAs can be scarce after creating VFs.
> > Increase the GuC buffer cache size to 4M for PF so that we can fit GuC
> > migration data (which currently maxes out at just under 4M) and use the
> 
> but the code below still uses 8M


Yeah - turns out we need more than 4M (I did my math on one of the
structs, but there's actually additional data present), so let's just
stick to 8M for now.

> 
> > cache instead of allocating fresh BOs.
> > 
> > Signed-off-by: Michał Winiarski <[email protected]>
> > ---
> >  drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c | 46 ++++++-------------
> >  drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h |  3 ++
> >  drivers/gpu/drm/xe/xe_guc.c                   | 12 ++++-
> >  3 files changed, 28 insertions(+), 33 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c 
> > b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
> > index 4e26feb9c267f..04fad3126865c 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
> > @@ -11,7 +11,7 @@
> >  #include "xe_gt_sriov_pf_helpers.h"
> >  #include "xe_gt_sriov_pf_migration.h"
> >  #include "xe_gt_sriov_printk.h"
> > -#include "xe_guc.h"
> > +#include "xe_guc_buf.h"
> >  #include "xe_guc_ct.h"
> >  #include "xe_sriov.h"
> >  #include "xe_sriov_migration_data.h"
> > @@ -57,73 +57,55 @@ static int pf_send_guc_query_vf_state_size(struct xe_gt 
> > *gt, unsigned int vfid)
> >  
> >  /* Return: number of state dwords saved or a negative error code on 
> > failure */
> >  static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid,
> > -                                void *buff, size_t size)
> > +                                void *dst, size_t size)
> >  {
> >     const int ndwords = size / sizeof(u32);
> > -   struct xe_tile *tile = gt_to_tile(gt);
> > -   struct xe_device *xe = tile_to_xe(tile);
> >     struct xe_guc *guc = &gt->uc.guc;
> > -   struct xe_bo *bo;
> > +   CLASS(xe_guc_buf, buf)(&guc->buf, ndwords);
> >     int ret;
> >  
> >     xe_gt_assert(gt, size % sizeof(u32) == 0);
> >     xe_gt_assert(gt, size == ndwords * sizeof(u32));
> >  
> > -   bo = xe_bo_create_pin_map_novm(xe, tile,
> > -                                  ALIGN(size, PAGE_SIZE),
> > -                                  ttm_bo_type_kernel,
> > -                                  XE_BO_FLAG_SYSTEM |
> > -                                  XE_BO_FLAG_GGTT |
> > -                                  XE_BO_FLAG_GGTT_INVALIDATE, false);
> > -   if (IS_ERR(bo))
> > -           return PTR_ERR(bo);
> > +   if (!xe_guc_buf_is_valid(buf))
> > +           return -ENOBUFS;
> > +
> > +   memset(xe_guc_buf_cpu_ptr(buf), 0, size);
> 
> hmm, I didn't find in the GuC spec that this buffer must be zeroed, so why 
> bother?

That was found during testing, GuC actually expects the buffer to be
zeroed.
I'll ping folks to update the spec.

> >  
> >     ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_SAVE,
> > -                                    xe_bo_ggtt_addr(bo), ndwords);
> > +                                    xe_guc_buf_flush(buf), ndwords);
> >     if (!ret)
> >             ret = -ENODATA;
> >     else if (ret > ndwords)
> >             ret = -EPROTO;
> >     else if (ret > 0)
> > -           xe_map_memcpy_from(xe, buff, &bo->vmap, 0, ret * sizeof(u32));
> > +           memcpy(dst, xe_guc_buf_sync_read(buf), ret * sizeof(u32));
> 
> nit: given this usage, maybe one day we should add optimized variant that 
> copies directly to dst?
> 
>       xe_guc_buf_sync_into(buf, dst, size);
> 
> >  
> > -   xe_bo_unpin_map_no_vm(bo);
> >     return ret;
> >  }
> >  
> >  /* Return: number of state dwords restored or a negative error code on 
> > failure */
> >  static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int 
> > vfid,
> > -                                   const void *buff, size_t size)
> > +                                   const void *src, size_t size)
> >  {
> >     const int ndwords = size / sizeof(u32);
> > -   struct xe_tile *tile = gt_to_tile(gt);
> > -   struct xe_device *xe = tile_to_xe(tile);
> >     struct xe_guc *guc = &gt->uc.guc;
> > -   struct xe_bo *bo;
> > +   CLASS(xe_guc_buf_from_data, buf)(&guc->buf, src, size);
> >     int ret;
> >  
> >     xe_gt_assert(gt, size % sizeof(u32) == 0);
> >     xe_gt_assert(gt, size == ndwords * sizeof(u32));
> >  
> > -   bo = xe_bo_create_pin_map_novm(xe, tile,
> > -                                  ALIGN(size, PAGE_SIZE),
> > -                                  ttm_bo_type_kernel,
> > -                                  XE_BO_FLAG_SYSTEM |
> > -                                  XE_BO_FLAG_GGTT |
> > -                                  XE_BO_FLAG_GGTT_INVALIDATE, false);
> > -   if (IS_ERR(bo))
> > -           return PTR_ERR(bo);
> > -
> > -   xe_map_memcpy_to(xe, &bo->vmap, 0, buff, size);
> > +   if (!xe_guc_buf_is_valid(buf))
> > +           return -ENOBUFS;
> >  
> >     ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_RESTORE,
> > -                                    xe_bo_ggtt_addr(bo), ndwords);
> > +                                    xe_guc_buf_flush(buf), ndwords);
> >     if (!ret)
> >             ret = -ENODATA;
> >     else if (ret > ndwords)
> >             ret = -EPROTO;
> >  
> > -   xe_bo_unpin_map_no_vm(bo);
> >     return ret;
> >  }
> >  
> > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h 
> > b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
> > index e2d41750f863c..4f2f2783339c3 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
> > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
> > @@ -11,6 +11,9 @@
> >  struct xe_gt;
> >  struct xe_sriov_migration_data;
> >  
> > +/* TODO: get this information by querying GuC in the future */
> > +#define XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE SZ_8M
> 
> so it's 8M or 4M ?
> 
> maybe wrap that into function now
> 
>   u32 xe_gt_sriov_pf_migration_guc_data_size(struct xe_gt *gt)
>   {
>         if (xe_sriov_pf_migration_supported(gt_to_xe))
>               return SZ_4M; /* TODO: ... */
>       return 0;
>   }

XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE disappears from this header
as a result of previous changes, so the size calculation can be kept
static.

> 
> > +
> >  int xe_gt_sriov_pf_migration_init(struct xe_gt *gt);
> >  int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int 
> > vfid);
> >  int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned 
> > int vfid);
> > diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
> > index 7c65528859ecb..cd6ab277a7876 100644
> > --- a/drivers/gpu/drm/xe/xe_guc.c
> > +++ b/drivers/gpu/drm/xe/xe_guc.c
> > @@ -24,6 +24,7 @@
> >  #include "xe_gt_printk.h"
> >  #include "xe_gt_sriov_vf.h"
> >  #include "xe_gt_throttle.h"
> > +#include "xe_gt_sriov_pf_migration.h"
> >  #include "xe_guc_ads.h"
> >  #include "xe_guc_buf.h"
> >  #include "xe_guc_capture.h"
> > @@ -40,6 +41,7 @@
> >  #include "xe_mmio.h"
> >  #include "xe_platform_types.h"
> >  #include "xe_sriov.h"
> > +#include "xe_sriov_pf_migration.h"
> >  #include "xe_uc.h"
> >  #include "xe_uc_fw.h"
> >  #include "xe_wa.h"
> > @@ -821,6 +823,14 @@ static int vf_guc_init_post_hwconfig(struct xe_guc 
> > *guc)
> >     return 0;
> >  }
> >  
> > +static u32 guc_buf_cache_size(struct xe_guc *guc)
> > +{
> > +   if (IS_SRIOV_PF(guc_to_xe(guc)) && 
> > xe_sriov_pf_migration_supported(guc_to_xe(guc)))
> > +           return XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE;
> 
> then
>       u32 size = XE_GUC_BUF_CACHE_DEFAULT_SIZE;
> 
>       if (IS_SRIOV_PF(guc_to_xe(guc)))
>               size += xe_gt_sriov_pf_migration_guc_data_size(guc_to_gt(guc));
> 
>       return size;

As the cache gets reused, we don't need to add anything to the default
(we should just replace the size with the new requirement for the
largest object size).

Thanks,
-Michał

> 
> > +   else
> > +           return XE_GUC_BUF_CACHE_DEFAULT_SIZE;
> > +}
> > +
> >  /**
> >   * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load
> >   * @guc: The GuC object
> > @@ -860,7 +870,7 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
> >     if (ret)
> >             return ret;
> >  
> > -   ret = xe_guc_buf_cache_init(&guc->buf, XE_GUC_BUF_CACHE_DEFAULT_SIZE);
> > +   ret = xe_guc_buf_cache_init(&guc->buf, guc_buf_cache_size(guc));
> >     if (ret)
> >             return ret;
> >  
>

Re: [PATCH v2 11/26] drm/xe/pf: Increase PF GuC Buffer Cache size and use it for VF migration

Reply via email to