[AMD Official Use Only - AMD Internal Distribution Only]
> -----Original Message-----
> From: Kuehling, Felix <[email protected]>
> Sent: Monday, July 28, 2025 12:12 PM
> To: Yat Sin, David <[email protected]>; [email protected]
> Cc: Bhardwaj, Rajneesh <[email protected]>
> Subject: Re: [PATCH v2 2/2] drm/amdkfd: Fix checkpoint-restore on multi-xcc
>
>
> On 2025-07-22 13:47, David Yat Sin wrote:
> > GPUs with multi-xcc have multiple MQDs per queue. This patch saves and
> > restores all the MQDs within the partition.
> >
> > Signed-off-by: David Yat Sin <[email protected]>
> > ---
> > .../drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +-
> > .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 61 ++++++++++++++++---
> > .../amd/amdkfd/kfd_process_queue_manager.c | 20 ++++--
> > 3 files changed, 67 insertions(+), 16 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > index 2d91027e2a74..6c5c7c1bf5ed 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > @@ -2725,7 +2725,7 @@ static void get_queue_checkpoint_info(struct
> > device_queue_manager *dqm,
> >
> > dqm_lock(dqm);
> > mqd_mgr = dqm->mqd_mgrs[mqd_type];
> > - *mqd_size = mqd_mgr->mqd_size;
> > + *mqd_size = mqd_mgr->mqd_size * NUM_XCC(mqd_mgr->dev-
> >xcc_mask);
> > *ctl_stack_size = 0;
> >
> > if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE &&
> > mqd_mgr->get_checkpoint_info) diff --git
> > a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> > index 97933d2a3803..f2dee320fada 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> > @@ -373,7 +373,7 @@ static void get_checkpoint_info(struct mqd_manager
> *mm, void *mqd, u32 *ctl_stac
> > {
> > struct v9_mqd *m = get_mqd(mqd);
> >
> > - *ctl_stack_size = m->cp_hqd_cntl_stack_size;
> > + *ctl_stack_size = m->cp_hqd_cntl_stack_size *
> > +NUM_XCC(mm->dev->xcc_mask);
> > }
> >
> > static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void
> > *mqd_dst, void *ctl_stack_dst) @@ -388,6 +388,24 @@ static void
> checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, voi
> > memcpy(ctl_stack_dst, ctl_stack, m->cp_hqd_cntl_stack_size);
> > }
> >
> > +static void checkpoint_mqd_v9_4_3(struct mqd_manager *mm,
> > + void *mqd,
> > + void *mqd_dst,
> > + void
> > *ctl_stack_dst)
> > +{
> > + struct v9_mqd *m;
> > + int xcc;
> > + uint64_t size = get_mqd(mqd)->cp_mqd_stride_size;
> > +
> > + for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
> > + m = get_mqd(mqd + size * xcc);
> > +
> > + checkpoint_mqd(mm, m,
> > + (uint8_t *)mqd_dst + sizeof(*m) * xcc,
> > + (uint8_t *)ctl_stack_dst +
> > m->cp_hqd_cntl_stack_size
> * xcc);
> > + }
> > +}
> > +
> > static void restore_mqd(struct mqd_manager *mm, void **mqd,
> > struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
> > struct queue_properties *qp,
> > @@ -764,13 +782,35 @@ static void restore_mqd_v9_4_3(struct mqd_manager
> *mm, void **mqd,
> > const void *mqd_src,
> > const void *ctl_stack_src, u32 ctl_stack_size)
> > {
> > - restore_mqd(mm, mqd, mqd_mem_obj, gart_addr, qp, mqd_src,
> ctl_stack_src, ctl_stack_size);
> > - if (amdgpu_sriov_multi_vf_mode(mm->dev->adev)) {
> > - struct v9_mqd *m;
> > + struct kfd_mem_obj xcc_mqd_mem_obj;
> > + u32 mqd_ctl_stack_size;
> > + struct v9_mqd *m;
> > + u32 num_xcc;
> > + int xcc;
> >
> > - m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
> > - m->cp_hqd_pq_doorbell_control |= 1 <<
> > -
> CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
>
> Is this special handling for SRIOV no longer needed? Maybe it's enough that
> it's
> handled during queue initialization. Then checkpoint and restore just
> preserves the
> updated doorbell mode. Other than that, this patch looks good to me.
>
> Reviewed-by: Felix Kuehling <[email protected]>
Thank you for the review.
Yes, It looks the doorbell mode would be preserved with the checkpoint and
restore. We are still waiting for validation on SRIOV. In case we need to set
m->cp_hqd_pq_doorbell_control in restore_mqd_v9_4_3, I will add it via another
patch later.
~David
>
>
> > + uint64_t offset = mm->mqd_stride(mm, qp);
> > +
> > + mm->dev->dqm->current_logical_xcc_start++;
> > +
> > + num_xcc = NUM_XCC(mm->dev->xcc_mask);
> > + mqd_ctl_stack_size = ctl_stack_size / num_xcc;
> > +
> > + memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
> > +
> > + /* Set the MQD pointer and gart address to XCC0 MQD */
> > + *mqd = mqd_mem_obj->cpu_ptr;
> > + if (gart_addr)
> > + *gart_addr = mqd_mem_obj->gpu_addr;
> > +
> > + for (xcc = 0; xcc < num_xcc; xcc++) {
> > + get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset * xcc);
> > + restore_mqd(mm, (void **)&m,
> > + &xcc_mqd_mem_obj,
> > + NULL,
> > + qp,
> > + (uint8_t *)mqd_src + xcc * sizeof(*m),
> > + (uint8_t *)ctl_stack_src + xcc *
> mqd_ctl_stack_size,
> > + mqd_ctl_stack_size);
> > }
> > }
> > static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, @@
> > -906,7 +946,6 @@ struct mqd_manager *mqd_manager_init_v9(enum
> KFD_MQD_TYPE type,
> > mqd->free_mqd = kfd_free_mqd_cp;
> > mqd->is_occupied = kfd_is_occupied_cp;
> > mqd->get_checkpoint_info = get_checkpoint_info;
> > - mqd->checkpoint_mqd = checkpoint_mqd;
> > mqd->mqd_size = sizeof(struct v9_mqd);
> > mqd->mqd_stride = mqd_stride_v9;
> > #if defined(CONFIG_DEBUG_FS)
> > @@ -918,16 +957,18 @@ struct mqd_manager *mqd_manager_init_v9(enum
> KFD_MQD_TYPE type,
> > mqd->init_mqd = init_mqd_v9_4_3;
> > mqd->load_mqd = load_mqd_v9_4_3;
> > mqd->update_mqd = update_mqd_v9_4_3;
> > - mqd->restore_mqd = restore_mqd_v9_4_3;
> > mqd->destroy_mqd = destroy_mqd_v9_4_3;
> > mqd->get_wave_state = get_wave_state_v9_4_3;
> > + mqd->checkpoint_mqd = checkpoint_mqd_v9_4_3;
> > + mqd->restore_mqd = restore_mqd_v9_4_3;
> > } else {
> > mqd->init_mqd = init_mqd;
> > mqd->load_mqd = load_mqd;
> > mqd->update_mqd = update_mqd;
> > - mqd->restore_mqd = restore_mqd;
> > mqd->destroy_mqd = kfd_destroy_mqd_cp;
> > mqd->get_wave_state = get_wave_state;
> > + mqd->checkpoint_mqd = checkpoint_mqd;
> > + mqd->restore_mqd = restore_mqd;
> > }
> > break;
> > case KFD_MQD_TYPE_HIQ:
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> > index fe4c48930aad..bae200035ff4 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> > @@ -922,7 +922,10 @@ static int criu_checkpoint_queues_device(struct
> > kfd_process_device *pdd,
> >
> > q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
> >
> > - /* data stored in this order: priv_data, mqd, ctl_stack */
> > + /*
> > + * data stored in this order:
> > + * priv_data, mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0],
> ctl_stack[xcc1]...
> > + */
> > q_data->mqd_size = mqd_size;
> > q_data->ctl_stack_size = ctl_stack_size;
> >
> > @@ -971,7 +974,7 @@ int kfd_criu_checkpoint_queues(struct kfd_process *p,
> > }
> >
> > static void set_queue_properties_from_criu(struct queue_properties *qp,
> > - struct kfd_criu_queue_priv_data
> > *q_data)
> > + struct kfd_criu_queue_priv_data
> > *q_data,
> uint32_t num_xcc)
> > {
> > qp->is_interop = false;
> > qp->queue_percent = q_data->q_percent; @@ -985,7 +988,11 @@ static
> > void set_queue_properties_from_criu(struct queue_properties *qp,
> > qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
> > qp->ctx_save_restore_area_address = q_data-
> >ctx_save_restore_area_address;
> > qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
> > - qp->ctl_stack_size = q_data->ctl_stack_size;
> > + if (q_data->type == KFD_QUEUE_TYPE_COMPUTE)
> > + qp->ctl_stack_size = q_data->ctl_stack_size / num_xcc;
> > + else
> > + qp->ctl_stack_size = q_data->ctl_stack_size;
> > +
> > qp->type = q_data->type;
> > qp->format = q_data->format;
> > }
> > @@ -1045,12 +1052,15 @@ int kfd_criu_restore_queue(struct kfd_process *p,
> > goto exit;
> > }
> >
> > - /* data stored in this order: mqd, ctl_stack */
> > + /*
> > + * data stored in this order:
> > + * mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]...
> > + */
> > mqd = q_extra_data;
> > ctl_stack = mqd + q_data->mqd_size;
> >
> > memset(&qp, 0, sizeof(qp));
> > - set_queue_properties_from_criu(&qp, q_data);
> > + set_queue_properties_from_criu(&qp, q_data,
> > +NUM_XCC(pdd->dev->adev->gfx.xcc_mask));
> >
> > print_queue_properties(&qp);
> >