On 10/30/2025 9:31 PM, Michał Winiarski wrote:
> Contiguous PF GGTT VMAs can be scarce after creating VFs.
> Increase the GuC buffer cache size to 8M for PF so that we can fit GuC
> migration data (which currently maxes out at just under 4M) and use the
> cache instead of allocating fresh BOs.
> 
> Signed-off-by: Michał Winiarski <[email protected]>
> ---
>  drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c | 46 ++++++-------------
>  drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h |  3 ++
>  drivers/gpu/drm/xe/xe_guc.c                   | 12 ++++-
>  3 files changed, 28 insertions(+), 33 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c 
> b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
> index 30f0e51da49ae..a2db127982638 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
> @@ -11,7 +11,7 @@
>  #include "xe_gt_sriov_pf_helpers.h"
>  #include "xe_gt_sriov_pf_migration.h"
>  #include "xe_gt_sriov_printk.h"
> -#include "xe_guc.h"
> +#include "xe_guc_buf.h"
>  #include "xe_guc_ct.h"
>  #include "xe_sriov.h"
>  #include "xe_sriov_migration_data.h"
> @@ -57,73 +57,55 @@ static int pf_send_guc_query_vf_state_size(struct xe_gt 
> *gt, unsigned int vfid)
>  
>  /* Return: number of state dwords saved or a negative error code on failure 
> */
>  static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid,
> -                                  void *buff, size_t size)
> +                                  void *dst, size_t size)
>  {
>       const int ndwords = size / sizeof(u32);
> -     struct xe_tile *tile = gt_to_tile(gt);
> -     struct xe_device *xe = tile_to_xe(tile);
>       struct xe_guc *guc = &gt->uc.guc;
> -     struct xe_bo *bo;
> +     CLASS(xe_guc_buf, buf)(&guc->buf, ndwords);
>       int ret;
>  
>       xe_gt_assert(gt, size % sizeof(u32) == 0);
>       xe_gt_assert(gt, size == ndwords * sizeof(u32));
>  
> -     bo = xe_bo_create_pin_map_novm(xe, tile,
> -                                    ALIGN(size, PAGE_SIZE),
> -                                    ttm_bo_type_kernel,
> -                                    XE_BO_FLAG_SYSTEM |
> -                                    XE_BO_FLAG_GGTT |
> -                                    XE_BO_FLAG_GGTT_INVALIDATE, false);
> -     if (IS_ERR(bo))
> -             return PTR_ERR(bo);
> +     if (!xe_guc_buf_is_valid(buf))
> +             return -ENOBUFS;
> +

nit: I would still add the comment that this to WA some known FW limitation

> +     memset(xe_guc_buf_cpu_ptr(buf), 0, size);
>  
>       ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_SAVE,
> -                                      xe_bo_ggtt_addr(bo), ndwords);
> +                                      xe_guc_buf_flush(buf), ndwords);
>       if (!ret)
>               ret = -ENODATA;
>       else if (ret > ndwords)
>               ret = -EPROTO;
>       else if (ret > 0)
> -             xe_map_memcpy_from(xe, buff, &bo->vmap, 0, ret * sizeof(u32));
> +             memcpy(dst, xe_guc_buf_sync_read(buf), ret * sizeof(u32));
>  
> -     xe_bo_unpin_map_no_vm(bo);
>       return ret;
>  }
>  
>  /* Return: number of state dwords restored or a negative error code on 
> failure */
>  static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int vfid,
> -                                     const void *buff, size_t size)
> +                                     const void *src, size_t size)
>  {
>       const int ndwords = size / sizeof(u32);
> -     struct xe_tile *tile = gt_to_tile(gt);
> -     struct xe_device *xe = tile_to_xe(tile);
>       struct xe_guc *guc = &gt->uc.guc;
> -     struct xe_bo *bo;
> +     CLASS(xe_guc_buf_from_data, buf)(&guc->buf, src, size);
>       int ret;
>  
>       xe_gt_assert(gt, size % sizeof(u32) == 0);
>       xe_gt_assert(gt, size == ndwords * sizeof(u32));
>  
> -     bo = xe_bo_create_pin_map_novm(xe, tile,
> -                                    ALIGN(size, PAGE_SIZE),
> -                                    ttm_bo_type_kernel,
> -                                    XE_BO_FLAG_SYSTEM |
> -                                    XE_BO_FLAG_GGTT |
> -                                    XE_BO_FLAG_GGTT_INVALIDATE, false);
> -     if (IS_ERR(bo))
> -             return PTR_ERR(bo);
> -
> -     xe_map_memcpy_to(xe, &bo->vmap, 0, buff, size);
> +     if (!xe_guc_buf_is_valid(buf))
> +             return -ENOBUFS;
>  
>       ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_RESTORE,
> -                                      xe_bo_ggtt_addr(bo), ndwords);
> +                                      xe_guc_buf_flush(buf), ndwords);
>       if (!ret)
>               ret = -ENODATA;
>       else if (ret > ndwords)
>               ret = -EPROTO;
>  
> -     xe_bo_unpin_map_no_vm(bo);
>       return ret;
>  }
>  
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h 
> b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
> index e2d41750f863c..4f2f2783339c3 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
> @@ -11,6 +11,9 @@
>  struct xe_gt;
>  struct xe_sriov_migration_data;
>  
> +/* TODO: get this information by querying GuC in the future */
> +#define XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE SZ_8M
> +
>  int xe_gt_sriov_pf_migration_init(struct xe_gt *gt);
>  int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int 
> vfid);
>  int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned 
> int vfid);
> diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
> index ecc3e091b89e6..badae9a26220e 100644
> --- a/drivers/gpu/drm/xe/xe_guc.c
> +++ b/drivers/gpu/drm/xe/xe_guc.c
> @@ -24,6 +24,7 @@
>  #include "xe_gt_printk.h"
>  #include "xe_gt_sriov_vf.h"
>  #include "xe_gt_throttle.h"
> +#include "xe_gt_sriov_pf_migration.h"
>  #include "xe_guc_ads.h"
>  #include "xe_guc_buf.h"
>  #include "xe_guc_capture.h"
> @@ -40,6 +41,7 @@
>  #include "xe_mmio.h"
>  #include "xe_platform_types.h"
>  #include "xe_sriov.h"
> +#include "xe_sriov_pf_migration.h"
>  #include "xe_uc.h"
>  #include "xe_uc_fw.h"
>  #include "xe_wa.h"
> @@ -821,6 +823,14 @@ static int vf_guc_init_post_hwconfig(struct xe_guc *guc)
>       return 0;
>  }
>  
> +static u32 guc_additional_cache_size(struct xe_device *xe)
> +{
> +     if (IS_SRIOV_PF(xe) && xe_sriov_pf_migration_supported(xe))
> +             return XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE;
> +     else
> +             return 0; /* Fallback to default size */
> +}
> +
>  /**
>   * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load
>   * @guc: The GuC object
> @@ -860,7 +870,7 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
>       if (ret)
>               return ret;
>  
> -     ret = xe_guc_buf_cache_init(&guc->buf);
> +     ret = xe_guc_buf_cache_init_with_size(&guc->buf, 
> guc_additional_cache_size(guc_to_xe(guc)));

nit: quite long line ... either split, or choose shorter name for the helper
>       if (ret)
>               return ret;
>  

just nits, otherwise LGTM,

Reviewed-by: Michal Wajdeczko <[email protected]>

Reply via email to