On Thu, Oct 23, 2025 at 07:37:48PM +0200, Michal Wajdeczko wrote: > > > On 10/22/2025 12:41 AM, Michał Winiarski wrote: > > Contiguous PF GGTT VMAs can be scarce after creating VFs. > > Increase the GuC buffer cache size to 4M for PF so that we can fit GuC > > migration data (which currently maxes out at just under 4M) and use the > > but the code below still uses 8M
Yeah - turns out we need more than 4M (I did my math on one of the structs, but there's actually additional data present), so let's just stick to 8M for now. > > > cache instead of allocating fresh BOs. > > > > Signed-off-by: Michał Winiarski <[email protected]> > > --- > > drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c | 46 ++++++------------- > > drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h | 3 ++ > > drivers/gpu/drm/xe/xe_guc.c | 12 ++++- > > 3 files changed, 28 insertions(+), 33 deletions(-) > > > > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c > > b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c > > index 4e26feb9c267f..04fad3126865c 100644 > > --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c > > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c > > @@ -11,7 +11,7 @@ > > #include "xe_gt_sriov_pf_helpers.h" > > #include "xe_gt_sriov_pf_migration.h" > > #include "xe_gt_sriov_printk.h" > > -#include "xe_guc.h" > > +#include "xe_guc_buf.h" > > #include "xe_guc_ct.h" > > #include "xe_sriov.h" > > #include "xe_sriov_migration_data.h" > > @@ -57,73 +57,55 @@ static int pf_send_guc_query_vf_state_size(struct xe_gt > > *gt, unsigned int vfid) > > > > /* Return: number of state dwords saved or a negative error code on > > failure */ > > static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid, > > - void *buff, size_t size) > > + void *dst, size_t size) > > { > > const int ndwords = size / sizeof(u32); > > - struct xe_tile *tile = gt_to_tile(gt); > > - struct xe_device *xe = tile_to_xe(tile); > > struct xe_guc *guc = >->uc.guc; > > - struct xe_bo *bo; > > + CLASS(xe_guc_buf, buf)(&guc->buf, ndwords); > > int ret; > > > > xe_gt_assert(gt, size % sizeof(u32) == 0); > > xe_gt_assert(gt, size == ndwords * sizeof(u32)); > > > > - bo = xe_bo_create_pin_map_novm(xe, tile, > > - ALIGN(size, PAGE_SIZE), > > - ttm_bo_type_kernel, > > - XE_BO_FLAG_SYSTEM | > > - XE_BO_FLAG_GGTT | > > - XE_BO_FLAG_GGTT_INVALIDATE, false); > > - if (IS_ERR(bo)) > > - return PTR_ERR(bo); > > + if (!xe_guc_buf_is_valid(buf)) > > + return -ENOBUFS; > > + > > + memset(xe_guc_buf_cpu_ptr(buf), 0, size); > > hmm, I didn't find in the GuC spec that this buffer must be zeroed, so why > bother? That was found during testing, GuC actually expects the buffer to be zeroed. I'll ping folks to update the spec. > > > > ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_SAVE, > > - xe_bo_ggtt_addr(bo), ndwords); > > + xe_guc_buf_flush(buf), ndwords); > > if (!ret) > > ret = -ENODATA; > > else if (ret > ndwords) > > ret = -EPROTO; > > else if (ret > 0) > > - xe_map_memcpy_from(xe, buff, &bo->vmap, 0, ret * sizeof(u32)); > > + memcpy(dst, xe_guc_buf_sync_read(buf), ret * sizeof(u32)); > > nit: given this usage, maybe one day we should add optimized variant that > copies directly to dst? > > xe_guc_buf_sync_into(buf, dst, size); > > > > > - xe_bo_unpin_map_no_vm(bo); > > return ret; > > } > > > > /* Return: number of state dwords restored or a negative error code on > > failure */ > > static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int > > vfid, > > - const void *buff, size_t size) > > + const void *src, size_t size) > > { > > const int ndwords = size / sizeof(u32); > > - struct xe_tile *tile = gt_to_tile(gt); > > - struct xe_device *xe = tile_to_xe(tile); > > struct xe_guc *guc = >->uc.guc; > > - struct xe_bo *bo; > > + CLASS(xe_guc_buf_from_data, buf)(&guc->buf, src, size); > > int ret; > > > > xe_gt_assert(gt, size % sizeof(u32) == 0); > > xe_gt_assert(gt, size == ndwords * sizeof(u32)); > > > > - bo = xe_bo_create_pin_map_novm(xe, tile, > > - ALIGN(size, PAGE_SIZE), > > - ttm_bo_type_kernel, > > - XE_BO_FLAG_SYSTEM | > > - XE_BO_FLAG_GGTT | > > - XE_BO_FLAG_GGTT_INVALIDATE, false); > > - if (IS_ERR(bo)) > > - return PTR_ERR(bo); > > - > > - xe_map_memcpy_to(xe, &bo->vmap, 0, buff, size); > > + if (!xe_guc_buf_is_valid(buf)) > > + return -ENOBUFS; > > > > ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_RESTORE, > > - xe_bo_ggtt_addr(bo), ndwords); > > + xe_guc_buf_flush(buf), ndwords); > > if (!ret) > > ret = -ENODATA; > > else if (ret > ndwords) > > ret = -EPROTO; > > > > - xe_bo_unpin_map_no_vm(bo); > > return ret; > > } > > > > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h > > b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h > > index e2d41750f863c..4f2f2783339c3 100644 > > --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h > > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h > > @@ -11,6 +11,9 @@ > > struct xe_gt; > > struct xe_sriov_migration_data; > > > > +/* TODO: get this information by querying GuC in the future */ > > +#define XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE SZ_8M > > so it's 8M or 4M ? > > maybe wrap that into function now > > u32 xe_gt_sriov_pf_migration_guc_data_size(struct xe_gt *gt) > { > if (xe_sriov_pf_migration_supported(gt_to_xe)) > return SZ_4M; /* TODO: ... */ > return 0; > } XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE disappears from this header as a result of previous changes, so the size calculation can be kept static. > > > + > > int xe_gt_sriov_pf_migration_init(struct xe_gt *gt); > > int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int > > vfid); > > int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned > > int vfid); > > diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c > > index 7c65528859ecb..cd6ab277a7876 100644 > > --- a/drivers/gpu/drm/xe/xe_guc.c > > +++ b/drivers/gpu/drm/xe/xe_guc.c > > @@ -24,6 +24,7 @@ > > #include "xe_gt_printk.h" > > #include "xe_gt_sriov_vf.h" > > #include "xe_gt_throttle.h" > > +#include "xe_gt_sriov_pf_migration.h" > > #include "xe_guc_ads.h" > > #include "xe_guc_buf.h" > > #include "xe_guc_capture.h" > > @@ -40,6 +41,7 @@ > > #include "xe_mmio.h" > > #include "xe_platform_types.h" > > #include "xe_sriov.h" > > +#include "xe_sriov_pf_migration.h" > > #include "xe_uc.h" > > #include "xe_uc_fw.h" > > #include "xe_wa.h" > > @@ -821,6 +823,14 @@ static int vf_guc_init_post_hwconfig(struct xe_guc > > *guc) > > return 0; > > } > > > > +static u32 guc_buf_cache_size(struct xe_guc *guc) > > +{ > > + if (IS_SRIOV_PF(guc_to_xe(guc)) && > > xe_sriov_pf_migration_supported(guc_to_xe(guc))) > > + return XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE; > > then > u32 size = XE_GUC_BUF_CACHE_DEFAULT_SIZE; > > if (IS_SRIOV_PF(guc_to_xe(guc))) > size += xe_gt_sriov_pf_migration_guc_data_size(guc_to_gt(guc)); > > return size; As the cache gets reused, we don't need to add anything to the default (we should just replace the size with the new requirement for the largest object size). Thanks, -Michał > > > + else > > + return XE_GUC_BUF_CACHE_DEFAULT_SIZE; > > +} > > + > > /** > > * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load > > * @guc: The GuC object > > @@ -860,7 +870,7 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc) > > if (ret) > > return ret; > > > > - ret = xe_guc_buf_cache_init(&guc->buf, XE_GUC_BUF_CACHE_DEFAULT_SIZE); > > + ret = xe_guc_buf_cache_init(&guc->buf, guc_buf_cache_size(guc)); > > if (ret) > > return ret; > > >
