Am 2021-08-19 um 9:37 a.m. schrieb David Yat Sin:
> Add support to existing CRIU ioctl's to save and restore events during
> criu checkpoint and restore.
>
> Signed-off-by: David Yat Sin <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 130 +++++++-----
>  drivers/gpu/drm/amd/amdkfd/kfd_events.c  | 253 ++++++++++++++++++++---
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  25 ++-
>  3 files changed, 329 insertions(+), 79 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 19f16e3dd769..c8f523d8ab81 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1008,51 +1008,11 @@ static int kfd_ioctl_create_event(struct file *filp, 
> struct kfd_process *p,
>        * through the event_page_offset field.
>        */
>       if (args->event_page_offset) {
> -             struct kfd_dev *kfd;
> -             struct kfd_process_device *pdd;
> -             void *mem, *kern_addr;
> -             uint64_t size;
> -
> -             if (p->signal_page) {
> -                     pr_err("Event page is already set\n");
> -                     return -EINVAL;
> -             }
> -
> -             kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
> -             if (!kfd) {
> -                     pr_err("Getting device by id failed in %s\n", __func__);
> -                     return -EINVAL;
> -             }
> -
>               mutex_lock(&p->mutex);
> -             pdd = kfd_bind_process_to_device(kfd, p);
> -             if (IS_ERR(pdd)) {
> -                     err = PTR_ERR(pdd);
> -                     goto out_unlock;
> -             }
> -
> -             mem = kfd_process_device_translate_handle(pdd,
> -                             GET_IDR_HANDLE(args->event_page_offset));
> -             if (!mem) {
> -                     pr_err("Can't find BO, offset is 0x%llx\n",
> -                            args->event_page_offset);
> -                     err = -EINVAL;
> -                     goto out_unlock;
> -             }
> +             err = kfd_kmap_event_page(p, args->event_page_offset);
>               mutex_unlock(&p->mutex);
> -
> -             err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
> -                                             mem, &kern_addr, &size);
> -             if (err) {
> -                     pr_err("Failed to map event page to kernel\n");
> -                     return err;
> -             }
> -
> -             err = kfd_event_page_set(p, kern_addr, size);
> -             if (err) {
> -                     pr_err("Failed to set event page\n");
> +             if (err)
>                       return err;
> -             }
>       }
>  
>       err = kfd_event_create(filp, p, args->event_type,
> @@ -1061,10 +1021,7 @@ static int kfd_ioctl_create_event(struct file *filp, 
> struct kfd_process *p,
>                               &args->event_page_offset,
>                               &args->event_slot_index);
>  
> -     return err;
> -
> -out_unlock:
> -     mutex_unlock(&p->mutex);
> +     pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);
>       return err;
>  }
>  
> @@ -2208,6 +2165,41 @@ static int criu_dump_queues(struct kfd_process *p, 
> struct kfd_ioctl_criu_dumper_
>       return ret;
>  }
>  
> +static int criu_dump_events(struct kfd_process *p, struct 
> kfd_ioctl_criu_dumper_args *args)
> +{
> +     struct kfd_criu_event_bucket *ev_buckets;
> +     uint32_t num_events;
> +     int ret =  0;
> +
> +     num_events = kfd_get_num_events(p);
> +     if (args->num_objects != num_events) {
> +             pr_err("Mismatch with number of events (current:%d 
> user:%lld)\n",
> +                                                     num_events, 
> args->num_objects);
> +
> +     }
> +
> +     if (args->objects_size != args->num_objects *
> +                               (sizeof(*ev_buckets) + sizeof(struct 
> kfd_criu_event_priv_data))) {
> +             pr_err("Invalid objects size for events\n");
> +             return -EINVAL;
> +     }
> +
> +     ev_buckets = kvzalloc(args->objects_size, GFP_KERNEL);
> +     if (!ev_buckets)
> +             return -ENOMEM;
> +
> +     ret = kfd_event_dump(p, ev_buckets, args->num_objects);
> +     if (!ret) {
> +             ret = copy_to_user((void __user *)args->objects, ev_buckets, 
> args->objects_size);
> +             if (ret) {
> +                     pr_err("Failed to copy events information to user\n");
> +                     ret = -EFAULT;
> +             }
> +     }
> +     kvfree(ev_buckets);
> +     return ret;
> +}
> +
>  static int kfd_ioctl_criu_dumper(struct file *filep,
>                               struct kfd_process *p, void *data)
>  {
> @@ -2246,6 +2238,8 @@ static int kfd_ioctl_criu_dumper(struct file *filep,
>               ret = criu_dump_queues(p, args);
>               break;
>       case KFD_CRIU_OBJECT_TYPE_EVENT:
> +             ret = criu_dump_events(p, args);
> +             break;
>       case KFD_CRIU_OBJECT_TYPE_DEVICE:
>       case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
>       default:
> @@ -2676,6 +2670,40 @@ static int criu_restore_queues(struct kfd_process *p,
>       return ret;
>  }
>  
> +static int criu_restore_events(struct file *filp, struct kfd_process *p,
> +                     struct kfd_ioctl_criu_restorer_args *args)
> +{
> +     int ret = 0, i;
> +     uint8_t *objects, *private_data;
> +     struct kfd_criu_event_bucket *ev_buckets;
> +
> +     objects = kvzalloc(args->objects_size, GFP_KERNEL);
> +     if (!objects)
> +             return -ENOMEM;
> +
> +     ret = copy_from_user(objects, (void __user *)args->objects, 
> args->objects_size);
> +     if (ret) {
> +             pr_err("Failed to copy event information from user\n");
> +             ret = -EFAULT;
> +             goto exit;
> +     }
> +
> +     ev_buckets = (struct kfd_criu_event_bucket *) objects;
> +     private_data = (void *)(ev_buckets + args->num_objects);
> +
> +     for (i = 0; i < args->num_objects; i++) {
> +             ret = kfd_event_restore(filp, p, &ev_buckets[i], private_data);
> +             if (ret) {
> +                     pr_err("Failed to restore event (%d)\n", ret);
> +                     goto exit;
> +             }
> +     }
> +
> +exit:
> +     kvfree(ev_buckets);
> +     return ret;
> +}
> +
>  static int kfd_ioctl_criu_restorer(struct file *filep,
>                               struct kfd_process *p, void *data)
>  {
> @@ -2698,6 +2726,8 @@ static int kfd_ioctl_criu_restorer(struct file *filep,
>               ret = criu_restore_queues(p, args);
>               break;
>       case KFD_CRIU_OBJECT_TYPE_EVENT:
> +             ret = criu_restore_events(filep, p, args);
> +             break;
>       case KFD_CRIU_OBJECT_TYPE_DEVICE:
>       case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
>       default:
> @@ -2799,9 +2829,13 @@ static int kfd_ioctl_criu_process_info(struct file 
> *filep,
>       args->queues_priv_data_size = queues_extra_data_size +
>                               (args->total_queues * sizeof(struct 
> kfd_criu_queue_priv_data));
>  
> -     dev_dbg(kfd_device, "Num of bos:%llu queues:%u\n",
> +     args->total_events = kfd_get_num_events(p);
> +     args->events_priv_data_size = args->total_events * sizeof(struct 
> kfd_criu_event_priv_data);
> +
> +     dev_dbg(kfd_device, "Num of bos:%llu queues:%u events:%u\n",
>                               args->total_bos,
> -                             args->total_queues);
> +                             args->total_queues,
> +                             args->total_events);
>  err_unlock:
>       mutex_unlock(&p->mutex);
>       return ret;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index ba2c2ce0c55a..18362478e351 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -53,9 +53,9 @@ struct kfd_signal_page {
>       uint64_t *kernel_address;
>       uint64_t __user *user_address;
>       bool need_to_free_pages;
> +     uint64_t user_handle; /* Needed for CRIU dumped and restore */
>  };
>  
> -
>  static uint64_t *page_slots(struct kfd_signal_page *page)
>  {
>       return page->kernel_address;
> @@ -92,7 +92,8 @@ static struct kfd_signal_page *allocate_signal_page(struct 
> kfd_process *p)
>  }
>  
>  static int allocate_event_notification_slot(struct kfd_process *p,
> -                                         struct kfd_event *ev)
> +                                         struct kfd_event *ev,
> +                                         const int *restore_id)
>  {
>       int id;
>  
> @@ -104,14 +105,19 @@ static int allocate_event_notification_slot(struct 
> kfd_process *p,
>               p->signal_mapped_size = 256*8;
>       }
>  
> -     /*
> -      * Compatibility with old user mode: Only use signal slots
> -      * user mode has mapped, may be less than
> -      * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
> -      * of the event limit without breaking user mode.
> -      */
> -     id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
> -                    GFP_KERNEL);
> +     if (restore_id) {
> +             id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
> +                             GFP_KERNEL);
> +     } else {
> +             /*
> +              * Compatibility with old user mode: Only use signal slots
> +              * user mode has mapped, may be less than
> +              * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
> +              * of the event limit without breaking user mode.
> +              */
> +             id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
> +                             GFP_KERNEL);
> +     }
>       if (id < 0)
>               return id;
>  
> @@ -178,9 +184,8 @@ static struct kfd_event 
> *lookup_signaled_event_by_partial_id(
>       return ev;
>  }
>  
> -static int create_signal_event(struct file *devkfd,
> -                             struct kfd_process *p,
> -                             struct kfd_event *ev)
> +static int create_signal_event(struct file *devkfd, struct kfd_process *p,
> +                             struct kfd_event *ev, const int *restore_id)
>  {
>       int ret;
>  
> @@ -193,7 +198,7 @@ static int create_signal_event(struct file *devkfd,
>               return -ENOSPC;
>       }
>  
> -     ret = allocate_event_notification_slot(p, ev);
> +     ret = allocate_event_notification_slot(p, ev, restore_id);
>       if (ret) {
>               pr_warn("Signal event wasn't created because out of kernel 
> memory\n");
>               return ret;
> @@ -209,16 +214,22 @@ static int create_signal_event(struct file *devkfd,
>       return 0;
>  }
>  
> -static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
> +static int create_other_event(struct kfd_process *p, struct kfd_event *ev, 
> const int *restore_id)
>  {
> -     /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
> -      * intentional integer overflow to -1 without a compiler
> -      * warning. idr_alloc treats a negative value as "maximum
> -      * signed integer".
> -      */
> -     int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
> -                        (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
> -                        GFP_KERNEL);
> +     int id;
> +
> +     if (restore_id)
> +             id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
> +                     GFP_KERNEL);
> +     else
> +             /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
> +              * intentional integer overflow to -1 without a compiler
> +              * warning. idr_alloc treats a negative value as "maximum
> +              * signed integer".
> +              */
> +             id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
> +                             (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
> +                             GFP_KERNEL);
>  
>       if (id < 0)
>               return id;
> @@ -295,8 +306,8 @@ static bool event_can_be_cpu_signaled(const struct 
> kfd_event *ev)
>       return ev->type == KFD_EVENT_TYPE_SIGNAL;
>  }
>  
> -int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
> -                    uint64_t size)
> +static int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
> +                    uint64_t size, uint64_t user_handle)
>  {
>       struct kfd_signal_page *page;
>  
> @@ -315,10 +326,55 @@ int kfd_event_page_set(struct kfd_process *p, void 
> *kernel_address,
>  
>       p->signal_page = page;
>       p->signal_mapped_size = size;
> -
> +     p->signal_page->user_handle = user_handle;
>       return 0;
>  }
>  
> +int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)

This function should be static. I also think that this function and
criu_dump/restore_events could be moved into kfd_events.c.

Regards,
  Felix


> +{
> +     struct kfd_dev *kfd;
> +     struct kfd_process_device *pdd;
> +     void *mem, *kern_addr;
> +     uint64_t size;
> +     int err = 0;
> +
> +     if (p->signal_page) {
> +             pr_err("Event page is already set\n");
> +             return -EINVAL;
> +     }
> +
> +     kfd = kfd_device_by_id(GET_GPU_ID(event_page_offset));
> +     if (!kfd) {
> +             pr_err("Getting device by id failed in %s\n", __func__);
> +             return -EINVAL;
> +     }
> +
> +     pdd = kfd_bind_process_to_device(kfd, p);
> +     if (IS_ERR(pdd))
> +             return PTR_ERR(pdd);
> +
> +     mem = kfd_process_device_translate_handle(pdd,
> +                     GET_IDR_HANDLE(event_page_offset));
> +     if (!mem) {
> +             pr_err("Can't find BO, offset is 0x%llx\n", event_page_offset);
> +             return -EINVAL;
> +     }
> +
> +     err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
> +                                     mem, &kern_addr, &size);
> +     if (err) {
> +             pr_err("Failed to map event page to kernel\n");
> +             return err;
> +     }
> +
> +     err = kfd_event_page_set(p, kern_addr, size, event_page_offset);
> +     if (err) {
> +             pr_err("Failed to set event page\n");
> +             return err;
> +     }
> +     return err;
> +}
> +
>  int kfd_event_create(struct file *devkfd, struct kfd_process *p,
>                    uint32_t event_type, bool auto_reset, uint32_t node_id,
>                    uint32_t *event_id, uint32_t *event_trigger_data,
> @@ -343,14 +399,14 @@ int kfd_event_create(struct file *devkfd, struct 
> kfd_process *p,
>       switch (event_type) {
>       case KFD_EVENT_TYPE_SIGNAL:
>       case KFD_EVENT_TYPE_DEBUG:
> -             ret = create_signal_event(devkfd, p, ev);
> +             ret = create_signal_event(devkfd, p, ev, NULL);
>               if (!ret) {
>                       *event_page_offset = KFD_MMAP_TYPE_EVENTS;
>                       *event_slot_index = ev->event_id;
>               }
>               break;
>       default:
> -             ret = create_other_event(p, ev);
> +             ret = create_other_event(p, ev, NULL);
>               break;
>       }
>  
> @@ -366,6 +422,147 @@ int kfd_event_create(struct file *devkfd, struct 
> kfd_process *p,
>       return ret;
>  }
>  
> +int kfd_event_restore(struct file *devkfd, struct kfd_process *p,
> +                   struct kfd_criu_event_bucket *ev_bucket,
> +                   uint8_t *priv_datas)
> +{
> +     int ret = 0;
> +     struct kfd_criu_event_priv_data *ev_priv;
> +     struct kfd_event *ev;
> +
> +     ev_priv = (struct kfd_criu_event_priv_data *)(priv_datas + 
> ev_bucket->priv_data_offset);
> +
> +     if (ev_priv->user_handle) {
> +             ret = kfd_kmap_event_page(p, ev_priv->user_handle);
> +             if (ret)
> +                     return ret;
> +     }
> +
> +     ev = kzalloc(sizeof(*ev), GFP_KERNEL);
> +     if (!ev)
> +             return -ENOMEM;
> +
> +     ev->type = ev_priv->type;
> +     ev->auto_reset = ev_priv->auto_reset;
> +     ev->signaled = ev_priv->signaled;
> +
> +     init_waitqueue_head(&ev->wq);
> +
> +     mutex_lock(&p->event_mutex);
> +     switch (ev->type) {
> +     case KFD_EVENT_TYPE_SIGNAL:
> +     case KFD_EVENT_TYPE_DEBUG:
> +             ret = create_signal_event(devkfd, p, ev, &ev_priv->event_id);
> +             break;
> +     case KFD_EVENT_TYPE_MEMORY:
> +             memcpy(&ev->memory_exception_data,
> +                     &ev_priv->memory_exception_data,
> +                     sizeof(struct kfd_hsa_memory_exception_data));
> +
> +             ev->memory_exception_data.gpu_id = ev_bucket->gpu_id;
> +             ret = create_other_event(p, ev, &ev_priv->event_id);
> +             break;
> +     case KFD_EVENT_TYPE_HW_EXCEPTION:
> +             memcpy(&ev->hw_exception_data,
> +                     &ev_priv->hw_exception_data,
> +                     sizeof(struct kfd_hsa_hw_exception_data));
> +
> +             ev->hw_exception_data.gpu_id = ev_bucket->gpu_id;
> +             ret = create_other_event(p, ev, &ev_priv->event_id);
> +             break;
> +     }
> +
> +     if (ret)
> +             kfree(ev);
> +
> +     mutex_unlock(&p->event_mutex);
> +
> +     return ret;
> +}
> +
> +int kfd_event_dump(struct kfd_process *p,
> +                struct kfd_criu_event_bucket *ev_buckets,
> +                uint32_t num_events)
> +{
> +     struct kfd_event *ev;
> +     struct kfd_criu_event_priv_data *ev_privs;
> +     uint32_t ev_id;
> +     int i = 0;
> +
> +     /* Private data for first event starts after all ev_buckets */
> +     ev_privs = (struct kfd_criu_event_priv_data *)((uint8_t *)ev_buckets +
> +                                                (num_events * 
> (sizeof(*ev_buckets))));
> +
> +
> +     idr_for_each_entry(&p->event_idr, ev, ev_id) {
> +             struct kfd_criu_event_bucket *ev_bucket;
> +             struct kfd_criu_event_priv_data *ev_priv;
> +
> +             if (i >= num_events) {
> +                     pr_err("Number of events exceeds number allocated\n");
> +                     return -ENOMEM;
> +             }
> +
> +             ev_bucket = &ev_buckets[i];
> +
> +             /* Currently, all events have same size of private_data, but 
> the current ioctl's
> +              * and CRIU plugin supports private_data of variable sizes
> +              */
> +             ev_priv = &ev_privs[i];
> +
> +             ev_bucket->priv_data_offset = i * sizeof(*ev_priv);
> +             ev_bucket->priv_data_size = sizeof(*ev_priv);
> +
> +             /* We store the user_handle with the first event */
> +             if (i == 0 && p->signal_page)
> +                     ev_priv->user_handle = p->signal_page->user_handle;
> +
> +             ev_priv->event_id = ev->event_id;
> +             ev_priv->auto_reset = ev->auto_reset;
> +             ev_priv->type = ev->type;
> +             ev_priv->signaled = ev->signaled;
> +
> +             /* We store the gpu_id in the bucket section so that the 
> userspace CRIU plugin can
> +              * modify it if needed.
> +              */
> +             if (ev_priv->type == KFD_EVENT_TYPE_MEMORY) {
> +                     memcpy(&ev_priv->memory_exception_data,
> +                             &ev->memory_exception_data,
> +                             sizeof(struct kfd_hsa_memory_exception_data));
> +
> +                     ev_bucket->gpu_id = 
> ev_priv->memory_exception_data.gpu_id;
> +             } else if (ev_priv->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
> +                     memcpy(&ev_priv->hw_exception_data,
> +                             &ev->hw_exception_data,
> +                             sizeof(struct kfd_hsa_hw_exception_data));
> +
> +                     ev_bucket->gpu_id = ev_priv->hw_exception_data.gpu_id;
> +             } else
> +                     ev_bucket->gpu_id = 0;
> +
> +             pr_debug("Dumped event[%d] id = 0x%08x auto_reset = %x type = 
> %x signaled = %x\n",
> +                       i,
> +                       ev_priv->event_id,
> +                       ev_priv->auto_reset,
> +                       ev_priv->type,
> +                       ev_priv->signaled);
> +             i++;
> +     }
> +     return 0;
> +}
> +
> +int kfd_get_num_events(struct kfd_process *p)
> +{
> +     struct kfd_event *ev;
> +     uint32_t id;
> +     u32 num_events = 0;
> +
> +     idr_for_each_entry(&p->event_idr, ev, id)
> +             num_events++;
> +
> +     return num_events++;
> +}
> +
>  /* Assumes that p is current. */
>  int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
>  {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 7ed6f831109d..bf10a5305ef7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -1069,9 +1069,26 @@ struct kfd_criu_queue_priv_data {
>  };
>  
>  struct kfd_criu_event_priv_data {
> -     uint64_t reserved;
> +     uint64_t user_handle;
> +     uint32_t event_id;
> +     uint32_t auto_reset;
> +     uint32_t type;
> +     uint32_t signaled;
> +
> +     union {
> +             struct kfd_hsa_memory_exception_data memory_exception_data;
> +             struct kfd_hsa_hw_exception_data hw_exception_data;
> +     };
>  };
>  
> +int kfd_event_restore(struct file *devkfd, struct kfd_process *p,
> +                   struct kfd_criu_event_bucket *ev_bucket,
> +                   uint8_t *priv_datas);
> +
> +int kfd_event_dump(struct kfd_process *p,
> +                struct kfd_criu_event_bucket *ev_buckets,
> +                uint32_t num_events);
> +
>  /* CRIU - End */
>  
>  /* Queue Context Management */
> @@ -1238,12 +1255,14 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
>  void kfd_signal_hw_exception_event(u32 pasid);
>  int kfd_set_event(struct kfd_process *p, uint32_t event_id);
>  int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
> -int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
> -                    uint64_t size);
> +int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset);
> +
>  int kfd_event_create(struct file *devkfd, struct kfd_process *p,
>                    uint32_t event_type, bool auto_reset, uint32_t node_id,
>                    uint32_t *event_id, uint32_t *event_trigger_data,
>                    uint64_t *event_page_offset, uint32_t *event_slot_index);
> +
> +int kfd_get_num_events(struct kfd_process *p);
>  int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
>  
>  void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,

Reply via email to