Add support to existing CRIU ioctl's to save and restore events during
criu checkpoint and restore.

Signed-off-by: David Yat Sin <[email protected]>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 130 +++++++-----
 drivers/gpu/drm/amd/amdkfd/kfd_events.c  | 253 ++++++++++++++++++++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  25 ++-
 3 files changed, 329 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 19f16e3dd769..c8f523d8ab81 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1008,51 +1008,11 @@ static int kfd_ioctl_create_event(struct file *filp, 
struct kfd_process *p,
         * through the event_page_offset field.
         */
        if (args->event_page_offset) {
-               struct kfd_dev *kfd;
-               struct kfd_process_device *pdd;
-               void *mem, *kern_addr;
-               uint64_t size;
-
-               if (p->signal_page) {
-                       pr_err("Event page is already set\n");
-                       return -EINVAL;
-               }
-
-               kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
-               if (!kfd) {
-                       pr_err("Getting device by id failed in %s\n", __func__);
-                       return -EINVAL;
-               }
-
                mutex_lock(&p->mutex);
-               pdd = kfd_bind_process_to_device(kfd, p);
-               if (IS_ERR(pdd)) {
-                       err = PTR_ERR(pdd);
-                       goto out_unlock;
-               }
-
-               mem = kfd_process_device_translate_handle(pdd,
-                               GET_IDR_HANDLE(args->event_page_offset));
-               if (!mem) {
-                       pr_err("Can't find BO, offset is 0x%llx\n",
-                              args->event_page_offset);
-                       err = -EINVAL;
-                       goto out_unlock;
-               }
+               err = kfd_kmap_event_page(p, args->event_page_offset);
                mutex_unlock(&p->mutex);
-
-               err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
-                                               mem, &kern_addr, &size);
-               if (err) {
-                       pr_err("Failed to map event page to kernel\n");
-                       return err;
-               }
-
-               err = kfd_event_page_set(p, kern_addr, size);
-               if (err) {
-                       pr_err("Failed to set event page\n");
+               if (err)
                        return err;
-               }
        }
 
        err = kfd_event_create(filp, p, args->event_type,
@@ -1061,10 +1021,7 @@ static int kfd_ioctl_create_event(struct file *filp, 
struct kfd_process *p,
                                &args->event_page_offset,
                                &args->event_slot_index);
 
-       return err;
-
-out_unlock:
-       mutex_unlock(&p->mutex);
+       pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);
        return err;
 }
 
@@ -2208,6 +2165,41 @@ static int criu_dump_queues(struct kfd_process *p, 
struct kfd_ioctl_criu_dumper_
        return ret;
 }
 
+static int criu_dump_events(struct kfd_process *p, struct 
kfd_ioctl_criu_dumper_args *args)
+{
+       struct kfd_criu_event_bucket *ev_buckets;
+       uint32_t num_events;
+       int ret =  0;
+
+       num_events = kfd_get_num_events(p);
+       if (args->num_objects != num_events) {
+               pr_err("Mismatch with number of events (current:%d 
user:%lld)\n",
+                                                       num_events, 
args->num_objects);
+
+       }
+
+       if (args->objects_size != args->num_objects *
+                                 (sizeof(*ev_buckets) + sizeof(struct 
kfd_criu_event_priv_data))) {
+               pr_err("Invalid objects size for events\n");
+               return -EINVAL;
+       }
+
+       ev_buckets = kvzalloc(args->objects_size, GFP_KERNEL);
+       if (!ev_buckets)
+               return -ENOMEM;
+
+       ret = kfd_event_dump(p, ev_buckets, args->num_objects);
+       if (!ret) {
+               ret = copy_to_user((void __user *)args->objects, ev_buckets, 
args->objects_size);
+               if (ret) {
+                       pr_err("Failed to copy events information to user\n");
+                       ret = -EFAULT;
+               }
+       }
+       kvfree(ev_buckets);
+       return ret;
+}
+
 static int kfd_ioctl_criu_dumper(struct file *filep,
                                struct kfd_process *p, void *data)
 {
@@ -2246,6 +2238,8 @@ static int kfd_ioctl_criu_dumper(struct file *filep,
                ret = criu_dump_queues(p, args);
                break;
        case KFD_CRIU_OBJECT_TYPE_EVENT:
+               ret = criu_dump_events(p, args);
+               break;
        case KFD_CRIU_OBJECT_TYPE_DEVICE:
        case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
        default:
@@ -2676,6 +2670,40 @@ static int criu_restore_queues(struct kfd_process *p,
        return ret;
 }
 
+static int criu_restore_events(struct file *filp, struct kfd_process *p,
+                       struct kfd_ioctl_criu_restorer_args *args)
+{
+       int ret = 0, i;
+       uint8_t *objects, *private_data;
+       struct kfd_criu_event_bucket *ev_buckets;
+
+       objects = kvzalloc(args->objects_size, GFP_KERNEL);
+       if (!objects)
+               return -ENOMEM;
+
+       ret = copy_from_user(objects, (void __user *)args->objects, 
args->objects_size);
+       if (ret) {
+               pr_err("Failed to copy event information from user\n");
+               ret = -EFAULT;
+               goto exit;
+       }
+
+       ev_buckets = (struct kfd_criu_event_bucket *) objects;
+       private_data = (void *)(ev_buckets + args->num_objects);
+
+       for (i = 0; i < args->num_objects; i++) {
+               ret = kfd_event_restore(filp, p, &ev_buckets[i], private_data);
+               if (ret) {
+                       pr_err("Failed to restore event (%d)\n", ret);
+                       goto exit;
+               }
+       }
+
+exit:
+       kvfree(ev_buckets);
+       return ret;
+}
+
 static int kfd_ioctl_criu_restorer(struct file *filep,
                                struct kfd_process *p, void *data)
 {
@@ -2698,6 +2726,8 @@ static int kfd_ioctl_criu_restorer(struct file *filep,
                ret = criu_restore_queues(p, args);
                break;
        case KFD_CRIU_OBJECT_TYPE_EVENT:
+               ret = criu_restore_events(filep, p, args);
+               break;
        case KFD_CRIU_OBJECT_TYPE_DEVICE:
        case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
        default:
@@ -2799,9 +2829,13 @@ static int kfd_ioctl_criu_process_info(struct file 
*filep,
        args->queues_priv_data_size = queues_extra_data_size +
                                (args->total_queues * sizeof(struct 
kfd_criu_queue_priv_data));
 
-       dev_dbg(kfd_device, "Num of bos:%llu queues:%u\n",
+       args->total_events = kfd_get_num_events(p);
+       args->events_priv_data_size = args->total_events * sizeof(struct 
kfd_criu_event_priv_data);
+
+       dev_dbg(kfd_device, "Num of bos:%llu queues:%u events:%u\n",
                                args->total_bos,
-                               args->total_queues);
+                               args->total_queues,
+                               args->total_events);
 err_unlock:
        mutex_unlock(&p->mutex);
        return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index ba2c2ce0c55a..18362478e351 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -53,9 +53,9 @@ struct kfd_signal_page {
        uint64_t *kernel_address;
        uint64_t __user *user_address;
        bool need_to_free_pages;
+       uint64_t user_handle; /* Needed for CRIU dumped and restore */
 };
 
-
 static uint64_t *page_slots(struct kfd_signal_page *page)
 {
        return page->kernel_address;
@@ -92,7 +92,8 @@ static struct kfd_signal_page *allocate_signal_page(struct 
kfd_process *p)
 }
 
 static int allocate_event_notification_slot(struct kfd_process *p,
-                                           struct kfd_event *ev)
+                                           struct kfd_event *ev,
+                                           const int *restore_id)
 {
        int id;
 
@@ -104,14 +105,19 @@ static int allocate_event_notification_slot(struct 
kfd_process *p,
                p->signal_mapped_size = 256*8;
        }
 
-       /*
-        * Compatibility with old user mode: Only use signal slots
-        * user mode has mapped, may be less than
-        * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
-        * of the event limit without breaking user mode.
-        */
-       id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
-                      GFP_KERNEL);
+       if (restore_id) {
+               id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
+                               GFP_KERNEL);
+       } else {
+               /*
+                * Compatibility with old user mode: Only use signal slots
+                * user mode has mapped, may be less than
+                * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
+                * of the event limit without breaking user mode.
+                */
+               id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
+                               GFP_KERNEL);
+       }
        if (id < 0)
                return id;
 
@@ -178,9 +184,8 @@ static struct kfd_event 
*lookup_signaled_event_by_partial_id(
        return ev;
 }
 
-static int create_signal_event(struct file *devkfd,
-                               struct kfd_process *p,
-                               struct kfd_event *ev)
+static int create_signal_event(struct file *devkfd, struct kfd_process *p,
+                               struct kfd_event *ev, const int *restore_id)
 {
        int ret;
 
@@ -193,7 +198,7 @@ static int create_signal_event(struct file *devkfd,
                return -ENOSPC;
        }
 
-       ret = allocate_event_notification_slot(p, ev);
+       ret = allocate_event_notification_slot(p, ev, restore_id);
        if (ret) {
                pr_warn("Signal event wasn't created because out of kernel 
memory\n");
                return ret;
@@ -209,16 +214,22 @@ static int create_signal_event(struct file *devkfd,
        return 0;
 }
 
-static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
+static int create_other_event(struct kfd_process *p, struct kfd_event *ev, 
const int *restore_id)
 {
-       /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
-        * intentional integer overflow to -1 without a compiler
-        * warning. idr_alloc treats a negative value as "maximum
-        * signed integer".
-        */
-       int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
-                          (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
-                          GFP_KERNEL);
+       int id;
+
+       if (restore_id)
+               id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
+                       GFP_KERNEL);
+       else
+               /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
+                * intentional integer overflow to -1 without a compiler
+                * warning. idr_alloc treats a negative value as "maximum
+                * signed integer".
+                */
+               id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
+                               (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
+                               GFP_KERNEL);
 
        if (id < 0)
                return id;
@@ -295,8 +306,8 @@ static bool event_can_be_cpu_signaled(const struct 
kfd_event *ev)
        return ev->type == KFD_EVENT_TYPE_SIGNAL;
 }
 
-int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
-                      uint64_t size)
+static int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
+                      uint64_t size, uint64_t user_handle)
 {
        struct kfd_signal_page *page;
 
@@ -315,10 +326,55 @@ int kfd_event_page_set(struct kfd_process *p, void 
*kernel_address,
 
        p->signal_page = page;
        p->signal_mapped_size = size;
-
+       p->signal_page->user_handle = user_handle;
        return 0;
 }
 
+int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
+{
+       struct kfd_dev *kfd;
+       struct kfd_process_device *pdd;
+       void *mem, *kern_addr;
+       uint64_t size;
+       int err = 0;
+
+       if (p->signal_page) {
+               pr_err("Event page is already set\n");
+               return -EINVAL;
+       }
+
+       kfd = kfd_device_by_id(GET_GPU_ID(event_page_offset));
+       if (!kfd) {
+               pr_err("Getting device by id failed in %s\n", __func__);
+               return -EINVAL;
+       }
+
+       pdd = kfd_bind_process_to_device(kfd, p);
+       if (IS_ERR(pdd))
+               return PTR_ERR(pdd);
+
+       mem = kfd_process_device_translate_handle(pdd,
+                       GET_IDR_HANDLE(event_page_offset));
+       if (!mem) {
+               pr_err("Can't find BO, offset is 0x%llx\n", event_page_offset);
+               return -EINVAL;
+       }
+
+       err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
+                                       mem, &kern_addr, &size);
+       if (err) {
+               pr_err("Failed to map event page to kernel\n");
+               return err;
+       }
+
+       err = kfd_event_page_set(p, kern_addr, size, event_page_offset);
+       if (err) {
+               pr_err("Failed to set event page\n");
+               return err;
+       }
+       return err;
+}
+
 int kfd_event_create(struct file *devkfd, struct kfd_process *p,
                     uint32_t event_type, bool auto_reset, uint32_t node_id,
                     uint32_t *event_id, uint32_t *event_trigger_data,
@@ -343,14 +399,14 @@ int kfd_event_create(struct file *devkfd, struct 
kfd_process *p,
        switch (event_type) {
        case KFD_EVENT_TYPE_SIGNAL:
        case KFD_EVENT_TYPE_DEBUG:
-               ret = create_signal_event(devkfd, p, ev);
+               ret = create_signal_event(devkfd, p, ev, NULL);
                if (!ret) {
                        *event_page_offset = KFD_MMAP_TYPE_EVENTS;
                        *event_slot_index = ev->event_id;
                }
                break;
        default:
-               ret = create_other_event(p, ev);
+               ret = create_other_event(p, ev, NULL);
                break;
        }
 
@@ -366,6 +422,147 @@ int kfd_event_create(struct file *devkfd, struct 
kfd_process *p,
        return ret;
 }
 
+int kfd_event_restore(struct file *devkfd, struct kfd_process *p,
+                     struct kfd_criu_event_bucket *ev_bucket,
+                     uint8_t *priv_datas)
+{
+       int ret = 0;
+       struct kfd_criu_event_priv_data *ev_priv;
+       struct kfd_event *ev;
+
+       ev_priv = (struct kfd_criu_event_priv_data *)(priv_datas + 
ev_bucket->priv_data_offset);
+
+       if (ev_priv->user_handle) {
+               ret = kfd_kmap_event_page(p, ev_priv->user_handle);
+               if (ret)
+                       return ret;
+       }
+
+       ev = kzalloc(sizeof(*ev), GFP_KERNEL);
+       if (!ev)
+               return -ENOMEM;
+
+       ev->type = ev_priv->type;
+       ev->auto_reset = ev_priv->auto_reset;
+       ev->signaled = ev_priv->signaled;
+
+       init_waitqueue_head(&ev->wq);
+
+       mutex_lock(&p->event_mutex);
+       switch (ev->type) {
+       case KFD_EVENT_TYPE_SIGNAL:
+       case KFD_EVENT_TYPE_DEBUG:
+               ret = create_signal_event(devkfd, p, ev, &ev_priv->event_id);
+               break;
+       case KFD_EVENT_TYPE_MEMORY:
+               memcpy(&ev->memory_exception_data,
+                       &ev_priv->memory_exception_data,
+                       sizeof(struct kfd_hsa_memory_exception_data));
+
+               ev->memory_exception_data.gpu_id = ev_bucket->gpu_id;
+               ret = create_other_event(p, ev, &ev_priv->event_id);
+               break;
+       case KFD_EVENT_TYPE_HW_EXCEPTION:
+               memcpy(&ev->hw_exception_data,
+                       &ev_priv->hw_exception_data,
+                       sizeof(struct kfd_hsa_hw_exception_data));
+
+               ev->hw_exception_data.gpu_id = ev_bucket->gpu_id;
+               ret = create_other_event(p, ev, &ev_priv->event_id);
+               break;
+       }
+
+       if (ret)
+               kfree(ev);
+
+       mutex_unlock(&p->event_mutex);
+
+       return ret;
+}
+
+int kfd_event_dump(struct kfd_process *p,
+                  struct kfd_criu_event_bucket *ev_buckets,
+                  uint32_t num_events)
+{
+       struct kfd_event *ev;
+       struct kfd_criu_event_priv_data *ev_privs;
+       uint32_t ev_id;
+       int i = 0;
+
+       /* Private data for first event starts after all ev_buckets */
+       ev_privs = (struct kfd_criu_event_priv_data *)((uint8_t *)ev_buckets +
+                                                  (num_events * 
(sizeof(*ev_buckets))));
+
+
+       idr_for_each_entry(&p->event_idr, ev, ev_id) {
+               struct kfd_criu_event_bucket *ev_bucket;
+               struct kfd_criu_event_priv_data *ev_priv;
+
+               if (i >= num_events) {
+                       pr_err("Number of events exceeds number allocated\n");
+                       return -ENOMEM;
+               }
+
+               ev_bucket = &ev_buckets[i];
+
+               /* Currently, all events have same size of private_data, but 
the current ioctl's
+                * and CRIU plugin supports private_data of variable sizes
+                */
+               ev_priv = &ev_privs[i];
+
+               ev_bucket->priv_data_offset = i * sizeof(*ev_priv);
+               ev_bucket->priv_data_size = sizeof(*ev_priv);
+
+               /* We store the user_handle with the first event */
+               if (i == 0 && p->signal_page)
+                       ev_priv->user_handle = p->signal_page->user_handle;
+
+               ev_priv->event_id = ev->event_id;
+               ev_priv->auto_reset = ev->auto_reset;
+               ev_priv->type = ev->type;
+               ev_priv->signaled = ev->signaled;
+
+               /* We store the gpu_id in the bucket section so that the 
userspace CRIU plugin can
+                * modify it if needed.
+                */
+               if (ev_priv->type == KFD_EVENT_TYPE_MEMORY) {
+                       memcpy(&ev_priv->memory_exception_data,
+                               &ev->memory_exception_data,
+                               sizeof(struct kfd_hsa_memory_exception_data));
+
+                       ev_bucket->gpu_id = 
ev_priv->memory_exception_data.gpu_id;
+               } else if (ev_priv->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+                       memcpy(&ev_priv->hw_exception_data,
+                               &ev->hw_exception_data,
+                               sizeof(struct kfd_hsa_hw_exception_data));
+
+                       ev_bucket->gpu_id = ev_priv->hw_exception_data.gpu_id;
+               } else
+                       ev_bucket->gpu_id = 0;
+
+               pr_debug("Dumped event[%d] id = 0x%08x auto_reset = %x type = 
%x signaled = %x\n",
+                         i,
+                         ev_priv->event_id,
+                         ev_priv->auto_reset,
+                         ev_priv->type,
+                         ev_priv->signaled);
+               i++;
+       }
+       return 0;
+}
+
+int kfd_get_num_events(struct kfd_process *p)
+{
+       struct kfd_event *ev;
+       uint32_t id;
+       u32 num_events = 0;
+
+       idr_for_each_entry(&p->event_idr, ev, id)
+               num_events++;
+
+       return num_events++;
+}
+
 /* Assumes that p is current. */
 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 7ed6f831109d..bf10a5305ef7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1069,9 +1069,26 @@ struct kfd_criu_queue_priv_data {
 };
 
 struct kfd_criu_event_priv_data {
-       uint64_t reserved;
+       uint64_t user_handle;
+       uint32_t event_id;
+       uint32_t auto_reset;
+       uint32_t type;
+       uint32_t signaled;
+
+       union {
+               struct kfd_hsa_memory_exception_data memory_exception_data;
+               struct kfd_hsa_hw_exception_data hw_exception_data;
+       };
 };
 
+int kfd_event_restore(struct file *devkfd, struct kfd_process *p,
+                     struct kfd_criu_event_bucket *ev_bucket,
+                     uint8_t *priv_datas);
+
+int kfd_event_dump(struct kfd_process *p,
+                  struct kfd_criu_event_bucket *ev_buckets,
+                  uint32_t num_events);
+
 /* CRIU - End */
 
 /* Queue Context Management */
@@ -1238,12 +1255,14 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
 void kfd_signal_hw_exception_event(u32 pasid);
 int kfd_set_event(struct kfd_process *p, uint32_t event_id);
 int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
-int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
-                      uint64_t size);
+int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset);
+
 int kfd_event_create(struct file *devkfd, struct kfd_process *p,
                     uint32_t event_type, bool auto_reset, uint32_t node_id,
                     uint32_t *event_id, uint32_t *event_trigger_data,
                     uint64_t *event_page_offset, uint32_t *event_slot_index);
+
+int kfd_get_num_events(struct kfd_process *p);
 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
 
 void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
-- 
2.17.1

Reply via email to