Connect the helpers to allow save and restore of VRAM migration data in stop_copy / resume device state.
Co-developed-by: Lukasz Laguna <[email protected]> Signed-off-by: Lukasz Laguna <[email protected]> Signed-off-by: Michał Winiarski <[email protected]> --- drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c | 18 ++ .../gpu/drm/xe/xe_gt_sriov_pf_control_types.h | 2 + drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c | 222 ++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h | 6 + .../drm/xe/xe_gt_sriov_pf_migration_types.h | 3 + drivers/gpu/drm/xe/xe_sriov_pf_control.c | 3 + 6 files changed, 254 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index e7156ad3d1839..680f2de44144b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -191,6 +191,7 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit) CASE2STR(SAVE_DATA_GUC); CASE2STR(SAVE_DATA_GGTT); CASE2STR(SAVE_DATA_MMIO); + CASE2STR(SAVE_DATA_VRAM); CASE2STR(SAVE_DATA_DONE); CASE2STR(SAVE_FAILED); CASE2STR(SAVED); @@ -832,6 +833,7 @@ static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid) pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_GGTT); pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_MMIO); pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_VRAM); } } @@ -885,6 +887,19 @@ static int pf_handle_vf_save_data(struct xe_gt *gt, unsigned int vfid) ret = xe_gt_sriov_pf_migration_mmio_save(gt, vfid); if (ret) return ret; + + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_VRAM); + return -EAGAIN; + } + + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_VRAM)) { + if (xe_gt_sriov_pf_migration_vram_size(gt, vfid) > 0) { + ret = xe_gt_sriov_pf_migration_vram_save(gt, vfid); + if (ret == -EAGAIN) + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_VRAM); + if (ret) + return ret; + } } return 0; @@ -1100,6 +1115,9 @@ pf_handle_vf_restore_data(struct xe_gt *gt, unsigned int vfid) case XE_SRIOV_MIGRATION_DATA_TYPE_GUC: ret = xe_gt_sriov_pf_migration_guc_restore(gt, vfid, data); break; + case XE_SRIOV_MIGRATION_DATA_TYPE_VRAM: + ret = xe_gt_sriov_pf_migration_vram_restore(gt, vfid, data); + break; default: xe_gt_sriov_notice(gt, "Skipping VF%u unknown data type: %d\n", vfid, data->type); break; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h index 9dfcebd5078ac..fba10136f7cc7 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h @@ -36,6 +36,7 @@ * @XE_GT_SRIOV_STATE_SAVE_DATA_GUC: indicates PF needs to save VF GuC migration data. * @XE_GT_SRIOV_STATE_SAVE_DATA_GGTT: indicates PF needs to save VF GGTT migration data. * @XE_GT_SRIOV_STATE_SAVE_DATA_MMIO: indicates PF needs to save VF MMIO migration data. + * @XE_GT_SRIOV_STATE_SAVE_DATA_VRAM: indicates PF needs to save VF VRAM migration data. * @XE_GT_SRIOV_STATE_SAVE_DATA_DONE: indicates that all migration data was produced by Xe. * @XE_GT_SRIOV_STATE_SAVE_FAILED: indicates that VF save operation has failed. * @XE_GT_SRIOV_STATE_SAVED: indicates that VF data is saved. @@ -82,6 +83,7 @@ enum xe_gt_sriov_control_bits { XE_GT_SRIOV_STATE_SAVE_DATA_GUC, XE_GT_SRIOV_STATE_SAVE_DATA_GGTT, XE_GT_SRIOV_STATE_SAVE_DATA_MMIO, + XE_GT_SRIOV_STATE_SAVE_DATA_VRAM, XE_GT_SRIOV_STATE_SAVE_DATA_DONE, XE_GT_SRIOV_STATE_SAVE_FAILED, XE_GT_SRIOV_STATE_SAVED, diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c index 41335b15ffdbe..2c6a86d98ee31 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c @@ -17,6 +17,7 @@ #include "xe_gt_sriov_printk.h" #include "xe_guc_buf.h" #include "xe_guc_ct.h" +#include "xe_migrate.h" #include "xe_sriov.h" #include "xe_sriov_migration_data.h" #include "xe_sriov_pf_migration.h" @@ -485,6 +486,220 @@ int xe_gt_sriov_pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, return pf_restore_vf_mmio_mig_data(gt, vfid, data); } +/** + * xe_gt_sriov_pf_migration_vram_size() - Get the size of VF VRAM migration data. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: size in bytes or a negative error code on failure. + */ +ssize_t xe_gt_sriov_pf_migration_vram_size(struct xe_gt *gt, unsigned int vfid) +{ + if (gt != xe_root_mmio_gt(gt_to_xe(gt))) + return 0; + + return xe_gt_sriov_pf_config_get_lmem(gt, vfid); +} + +static struct dma_fence *__pf_save_restore_vram(struct xe_gt *gt, unsigned int vfid, + struct xe_bo *vram, u64 vram_offset, + struct xe_bo *sysmem, u64 sysmem_offset, + size_t size, bool save) +{ + struct dma_fence *ret = NULL; + struct drm_exec exec; + int err; + + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + err = drm_exec_lock_obj(&exec, &vram->ttm.base); + drm_exec_retry_on_contention(&exec); + if (err) { + ret = ERR_PTR(err); + goto err; + } + + err = drm_exec_lock_obj(&exec, &sysmem->ttm.base); + drm_exec_retry_on_contention(&exec); + if (err) { + ret = ERR_PTR(err); + goto err; + } + } + + ret = xe_migrate_vram_copy_chunk(vram, vram_offset, sysmem, sysmem_offset, size, + save ? XE_MIGRATE_COPY_TO_SRAM : XE_MIGRATE_COPY_TO_VRAM); + +err: + drm_exec_fini(&exec); + + return ret; +} + +static int pf_save_vram_chunk(struct xe_gt *gt, unsigned int vfid, + struct xe_bo *src_vram, u64 src_vram_offset, + size_t size) +{ + struct xe_sriov_migration_data *data; + struct dma_fence *fence; + int ret; + + data = xe_sriov_migration_data_alloc(gt_to_xe(gt)); + if (!data) + return -ENOMEM; + + ret = xe_sriov_migration_data_init(data, gt->tile->id, gt->info.id, + XE_SRIOV_MIGRATION_DATA_TYPE_VRAM, + src_vram_offset, size); + if (ret) + goto fail; + + fence = __pf_save_restore_vram(gt, vfid, + src_vram, src_vram_offset, + data->bo, 0, size, true); + + ret = dma_fence_wait_timeout(fence, false, 5 * HZ); + dma_fence_put(fence); + if (!ret) { + ret = -ETIME; + goto fail; + } + + pf_dump_mig_data(gt, vfid, data); + + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data); + if (ret) + goto fail; + + return 0; + +fail: + xe_sriov_migration_data_free(data); + return ret; +} + +#define VF_VRAM_STATE_CHUNK_MAX_SIZE SZ_512M +static int pf_save_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); + loff_t *offset = &migration->vram_save_offset; + struct xe_bo *vram; + size_t vram_size, chunk_size; + int ret; + + vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid); + if (!vram) + return -ENXIO; + + vram_size = xe_bo_size(vram); + chunk_size = min(vram_size - *offset, VF_VRAM_STATE_CHUNK_MAX_SIZE); + + ret = pf_save_vram_chunk(gt, vfid, vram, *offset, chunk_size); + if (ret) + goto fail; + + *offset += chunk_size; + + xe_bo_put(vram); + + if (*offset < vram_size) + return -EAGAIN; + + return 0; + +fail: + xe_bo_put(vram); + xe_gt_sriov_err(gt, "Failed to save VF%u VRAM data (%pe)\n", vfid, ERR_PTR(ret)); + return ret; +} + +static int pf_restore_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_migration_data *data) +{ + u64 end = data->hdr.offset + data->hdr.size; + struct dma_fence *fence; + struct xe_bo *vram; + size_t size; + int ret = 0; + + vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid); + if (!vram) + return -ENXIO; + + size = xe_bo_size(vram); + + if (end > size || end < data->hdr.size) { + ret = -EINVAL; + goto err; + } + + pf_dump_mig_data(gt, vfid, data); + + fence = __pf_save_restore_vram(gt, vfid, vram, data->hdr.offset, + data->bo, 0, data->hdr.size, false); + ret = dma_fence_wait_timeout(fence, false, 5 * HZ); + dma_fence_put(fence); + if (!ret) { + ret = -ETIME; + goto err; + } + + return 0; +err: + xe_bo_put(vram); + xe_gt_sriov_err(gt, "Failed to restore VF%u VRAM data (%pe)\n", vfid, ERR_PTR(ret)); + return ret; +} + +/** + * xe_gt_sriov_pf_migration_vram_save() - Save VF VRAM migration data. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + return pf_save_vf_vram_mig_data(gt, vfid); +} + +/** + * xe_gt_sriov_pf_migration_vram_restore() - Restore VF VRAM migration data. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_migration_data *data) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + return pf_restore_vf_vram_mig_data(gt, vfid, data); +} + +/** + * xe_gt_sriov_pf_migration_save_init() - Initialize per-GT migration related data. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + */ +void xe_gt_sriov_pf_migration_save_init(struct xe_gt *gt, unsigned int vfid) +{ + pf_pick_gt_migration(gt, vfid)->vram_save_offset = 0; +} + /** * xe_gt_sriov_pf_migration_size() - Total size of migration data from all components within a GT. * @gt: the &xe_gt @@ -522,6 +737,13 @@ ssize_t xe_gt_sriov_pf_migration_size(struct xe_gt *gt, unsigned int vfid) size += sizeof(struct xe_sriov_pf_migration_hdr); total += size; + size = xe_gt_sriov_pf_migration_vram_size(gt, vfid); + if (size < 0) + return size; + else if (size > 0) + size += sizeof(struct xe_sriov_pf_migration_hdr); + total += size; + return total; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h index 24a233c4cd0bb..ca518eda5429f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h @@ -27,6 +27,12 @@ ssize_t xe_gt_sriov_pf_migration_mmio_size(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, struct xe_sriov_migration_data *data); +ssize_t xe_gt_sriov_pf_migration_vram_size(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_migration_data *data); + +void xe_gt_sriov_pf_migration_save_init(struct xe_gt *gt, unsigned int vfid); ssize_t xe_gt_sriov_pf_migration_size(struct xe_gt *gt, unsigned int vfid); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h index 75d8b94cbbefb..39a940c9b0a4b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h @@ -16,6 +16,9 @@ struct xe_gt_sriov_migration_data { /** @ring: queue containing VF save / restore migration data */ struct ptr_ring ring; + + /** @vram_save_offset: offset within VRAM, used for chunked VRAM save */ + loff_t vram_save_offset; }; #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_sriov_pf_control.c index c2768848daba1..aac8ecb861545 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.c @@ -5,6 +5,7 @@ #include "xe_device.h" #include "xe_gt_sriov_pf_control.h" +#include "xe_gt_sriov_pf_migration.h" #include "xe_sriov_migration_data.h" #include "xe_sriov_pf_control.h" #include "xe_sriov_printk.h" @@ -171,6 +172,8 @@ int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid) return ret; for_each_gt(gt, xe, id) { + xe_gt_sriov_pf_migration_save_init(gt, vfid); + ret = xe_gt_sriov_pf_control_trigger_save_vf(gt, vfid); if (ret) return ret; -- 2.50.1
