On Wed, Nov 05, 2025 at 04:10:20PM +0100, Michał Winiarski wrote: > Connect the helpers to allow save and restore of VRAM migration data in > stop_copy / resume device state. > > Co-developed-by: Lukasz Laguna <[email protected]> > Signed-off-by: Lukasz Laguna <[email protected]> > Signed-off-by: Michał Winiarski <[email protected]>
Reviewed-by: Matthew Brost <[email protected]> > --- > drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c | 17 ++ > drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c | 211 ++++++++++++++++++ > drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h | 3 + > .../drm/xe/xe_gt_sriov_pf_migration_types.h | 2 + > drivers/gpu/drm/xe/xe_sriov_pf_control.c | 3 + > 5 files changed, 236 insertions(+) > > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c > b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c > index abc2bd09288ea..aae0c98657408 100644 > --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c > @@ -891,6 +891,20 @@ static int pf_handle_vf_save_data(struct xe_gt *gt, > unsigned int vfid) > return -EAGAIN; > } > > + if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, > + > XE_SRIOV_PACKET_TYPE_VRAM)) { > + ret = xe_gt_sriov_pf_migration_vram_save(gt, vfid); > + if (ret == -EAGAIN) > + return -EAGAIN; > + else if (ret) > + return ret; > + > + xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, > + > XE_SRIOV_PACKET_TYPE_VRAM); > + > + return -EAGAIN; > + } > + > return 0; > } > > @@ -1129,6 +1143,9 @@ static int pf_handle_vf_restore_data(struct xe_gt *gt, > unsigned int vfid) > case XE_SRIOV_PACKET_TYPE_GUC: > ret = xe_gt_sriov_pf_migration_guc_restore(gt, vfid, data); > break; > + case XE_SRIOV_PACKET_TYPE_VRAM: > + ret = xe_gt_sriov_pf_migration_vram_restore(gt, vfid, data); > + break; > default: > xe_gt_sriov_notice(gt, "Skipping VF%u unknown data type: %d\n", > vfid, data->type); > break; > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c > b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c > index 22f471f269cfa..c62bb67c20a6b 100644 > --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c > @@ -19,6 +19,7 @@ > #include "xe_gt_sriov_printk.h" > #include "xe_guc_buf.h" > #include "xe_guc_ct.h" > +#include "xe_migrate.h" > #include "xe_mmio.h" > #include "xe_sriov.h" > #include "xe_sriov_packet.h" > @@ -501,6 +502,205 @@ int xe_gt_sriov_pf_migration_mmio_restore(struct xe_gt > *gt, unsigned int vfid, > return pf_restore_vf_mmio_mig_data(gt, vfid, data); > } > > +static ssize_t pf_migration_vram_size(struct xe_gt *gt, unsigned int vfid) > +{ > + if (!xe_gt_is_main_type(gt)) > + return 0; > + > + return xe_gt_sriov_pf_config_get_lmem(gt, vfid); > +} > + > +static struct dma_fence *__pf_save_restore_vram(struct xe_gt *gt, unsigned > int vfid, > + struct xe_bo *vram, u64 > vram_offset, > + struct xe_bo *sysmem, u64 > sysmem_offset, > + size_t size, bool save) > +{ > + struct dma_fence *ret = NULL; > + struct drm_exec exec; > + int err; > + > + drm_exec_init(&exec, 0, 0); > + drm_exec_until_all_locked(&exec) { > + err = drm_exec_lock_obj(&exec, &vram->ttm.base); > + drm_exec_retry_on_contention(&exec); > + if (err) { > + ret = ERR_PTR(err); > + goto err; > + } > + > + err = drm_exec_lock_obj(&exec, &sysmem->ttm.base); > + drm_exec_retry_on_contention(&exec); > + if (err) { > + ret = ERR_PTR(err); > + goto err; > + } > + } > + > + ret = xe_migrate_vram_copy_chunk(vram, vram_offset, sysmem, > sysmem_offset, size, > + save ? XE_MIGRATE_COPY_TO_SRAM : > XE_MIGRATE_COPY_TO_VRAM); > + > +err: > + drm_exec_fini(&exec); > + > + return ret; > +} > + > +#define PF_VRAM_SAVE_RESTORE_TIMEOUT (5 * HZ) > +static int pf_save_vram_chunk(struct xe_gt *gt, unsigned int vfid, > + struct xe_bo *src_vram, u64 src_vram_offset, > + size_t size) > +{ > + struct xe_sriov_packet *data; > + struct dma_fence *fence; > + int ret; > + > + data = xe_sriov_packet_alloc(gt_to_xe(gt)); > + if (!data) > + return -ENOMEM; > + > + ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id, > + XE_SRIOV_PACKET_TYPE_VRAM, src_vram_offset, > + size); > + if (ret) > + goto fail; > + > + fence = __pf_save_restore_vram(gt, vfid, > + src_vram, src_vram_offset, > + data->bo, 0, size, true); > + > + ret = dma_fence_wait_timeout(fence, false, > PF_VRAM_SAVE_RESTORE_TIMEOUT); > + dma_fence_put(fence); > + if (!ret) { > + ret = -ETIME; > + goto fail; > + } > + > + pf_dump_mig_data(gt, vfid, data, "VRAM data save"); > + > + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data); > + if (ret) > + goto fail; > + > + return 0; > + > +fail: > + xe_sriov_packet_free(data); > + return ret; > +} > + > +#define VF_VRAM_STATE_CHUNK_MAX_SIZE SZ_512M > +static int pf_save_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid) > +{ > + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, > vfid); > + loff_t *offset = &migration->save.vram_offset; > + struct xe_bo *vram; > + size_t vram_size, chunk_size; > + int ret; > + > + vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid); > + if (!vram) > + return -ENXIO; > + > + vram_size = xe_bo_size(vram); > + > + xe_gt_assert(gt, *offset < vram_size); > + > + chunk_size = min(vram_size - *offset, VF_VRAM_STATE_CHUNK_MAX_SIZE); > + > + ret = pf_save_vram_chunk(gt, vfid, vram, *offset, chunk_size); > + if (ret) > + goto fail; > + > + *offset += chunk_size; > + > + xe_bo_put(vram); > + > + if (*offset < vram_size) > + return -EAGAIN; > + > + return 0; > + > +fail: > + xe_bo_put(vram); > + xe_gt_sriov_err(gt, "Failed to save VF%u VRAM data (%pe)\n", vfid, > ERR_PTR(ret)); > + return ret; > +} > + > +static int pf_restore_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid, > + struct xe_sriov_packet *data) > +{ > + u64 end = data->hdr.offset + data->hdr.size; > + struct dma_fence *fence; > + struct xe_bo *vram; > + size_t size; > + int ret = 0; > + > + vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid); > + if (!vram) > + return -ENXIO; > + > + size = xe_bo_size(vram); > + > + if (end > size || end < data->hdr.size) { > + ret = -EINVAL; > + goto err; > + } > + > + pf_dump_mig_data(gt, vfid, data, "VRAM data restore"); > + > + fence = __pf_save_restore_vram(gt, vfid, vram, data->hdr.offset, > + data->bo, 0, data->hdr.size, false); > + ret = dma_fence_wait_timeout(fence, false, > PF_VRAM_SAVE_RESTORE_TIMEOUT); > + dma_fence_put(fence); > + if (!ret) { > + ret = -ETIME; > + goto err; > + } > + > + return 0; > +err: > + xe_bo_put(vram); > + xe_gt_sriov_err(gt, "Failed to restore VF%u VRAM data (%pe)\n", vfid, > ERR_PTR(ret)); > + return ret; > +} > + > +/** > + * xe_gt_sriov_pf_migration_vram_save() - Save VF VRAM migration data. > + * @gt: the &xe_gt > + * @vfid: the VF identifier (can't be 0) > + * > + * This function is for PF only. > + * > + * Return: 0 on success or a negative error code on failure. > + */ > +int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid) > +{ > + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); > + xe_gt_assert(gt, vfid != PFID); > + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); > + > + return pf_save_vf_vram_mig_data(gt, vfid); > +} > + > +/** > + * xe_gt_sriov_pf_migration_vram_restore() - Restore VF VRAM migration data. > + * @gt: the &xe_gt > + * @vfid: the VF identifier (can't be 0) > + * > + * This function is for PF only. > + * > + * Return: 0 on success or a negative error code on failure. > + */ > +int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int > vfid, > + struct xe_sriov_packet *data) > +{ > + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); > + xe_gt_assert(gt, vfid != PFID); > + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); > + > + return pf_restore_vf_vram_mig_data(gt, vfid, data); > +} > + > /** > * xe_gt_sriov_pf_migration_size() - Total size of migration data from all > components within a GT. > * @gt: the &xe_gt > @@ -540,6 +740,13 @@ ssize_t xe_gt_sriov_pf_migration_size(struct xe_gt *gt, > unsigned int vfid) > size += sizeof(struct xe_sriov_pf_migration_hdr); > total += size; > > + size = pf_migration_vram_size(gt, vfid); > + if (size < 0) > + return size; > + if (size > 0) > + size += sizeof(struct xe_sriov_pf_migration_hdr); > + total += size; > + > return total; > } > > @@ -602,6 +809,7 @@ void xe_gt_sriov_pf_migration_save_init(struct xe_gt *gt, > unsigned int vfid) > struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, > vfid); > > migration->save.data_remaining = 0; > + migration->save.vram_offset = 0; > > xe_gt_assert(gt, pf_migration_guc_size(gt, vfid) > 0); > pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_GUC); > @@ -611,6 +819,9 @@ void xe_gt_sriov_pf_migration_save_init(struct xe_gt *gt, > unsigned int vfid) > > xe_gt_assert(gt, pf_migration_mmio_size(gt, vfid) > 0); > pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_MMIO); > + > + if (pf_migration_vram_size(gt, vfid) > 0) > + pf_migration_save_data_todo(gt, vfid, > XE_SRIOV_PACKET_TYPE_VRAM); > } > > /** > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h > b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h > index 04b3ed0d2aa23..181207a637b93 100644 > --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h > @@ -25,6 +25,9 @@ int xe_gt_sriov_pf_migration_ggtt_restore(struct xe_gt *gt, > unsigned int vfid, > int xe_gt_sriov_pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid); > int xe_gt_sriov_pf_migration_mmio_restore(struct xe_gt *gt, unsigned int > vfid, > struct xe_sriov_packet *data); > +int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid); > +int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int > vfid, > + struct xe_sriov_packet *data); > > ssize_t xe_gt_sriov_pf_migration_size(struct xe_gt *gt, unsigned int vfid); > > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h > b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h > index 9f24878690d9c..f50c64241e9c0 100644 > --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h > @@ -20,6 +20,8 @@ struct xe_gt_sriov_migration_data { > struct { > /** @save.data_remaining: bitmap of migration types that need > to be saved */ > unsigned long data_remaining; > + /** @save.vram_offset: last saved offset within VRAM, used for > chunked VRAM save */ > + loff_t vram_offset; > } save; > }; > > diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.c > b/drivers/gpu/drm/xe/xe_sriov_pf_control.c > index 87205f0505ad0..eec218c710278 100644 > --- a/drivers/gpu/drm/xe/xe_sriov_pf_control.c > +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.c > @@ -5,6 +5,7 @@ > > #include "xe_device.h" > #include "xe_gt_sriov_pf_control.h" > +#include "xe_gt_sriov_pf_migration.h" > #include "xe_sriov_packet.h" > #include "xe_sriov_pf_control.h" > #include "xe_sriov_printk.h" > @@ -171,6 +172,8 @@ int xe_sriov_pf_control_trigger_save_vf(struct xe_device > *xe, unsigned int vfid) > return ret; > > for_each_gt(gt, xe, id) { > + xe_gt_sriov_pf_migration_save_init(gt, vfid); > + > ret = xe_gt_sriov_pf_control_trigger_save_vf(gt, vfid); > if (ret) > return ret; > -- > 2.51.2 >
