Tarun Sahu <[email protected]> writes:

> Introduce core infrastructure to support VM preservation with LUO.
>
> First two changes are just refactoring, no functional change, third
> change introduces a new member in struct kvm.
> - Move ITOA_MAX_LEN to kvm_mm.h for reuse by upcoming kvm_luo code.
> - Add a public kvm_create_vm_file() helper wrapping kvm_create_vm()
>   and anon_inode_getfile() to provide a unified VM file creation API.
> - Track a weak reference to the backing file in struct kvm under
>   CONFIG_LIVEUPDATE_GUEST_MEMFD to enable reverse file resolution
>   without circular lifetime dependencies.
>

Given the above, I think this should be separate patches.

> Signed-off-by: Tarun Sahu <[email protected]>
> ---
>  include/linux/kvm_host.h | 14 +++++++
>  virt/kvm/kvm_main.c      | 79 +++++++++++++++++++++++++++++-----------
>  virt/kvm/kvm_mm.h        |  3 ++
>  3 files changed, 75 insertions(+), 21 deletions(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 4c14aee1fb06..9111a28637af 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -874,6 +874,18 @@ struct kvm {
>  #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
>       /* Protected by slots_lock (for writes) and RCU (for reads) */
>       struct xarray mem_attr_array;
> +#endif
> +#ifdef CONFIG_LIVEUPDATE_GUEST_MEMFD
> +     /*
> +      * Weak reference to the VFS file backing this KVM instance. Stored
> +      * without incrementing the file refcount to prevent a circular lifetime
> +      * dependency (since file->private_data already pins this struct kvm).
> +      * Used exclusively to resolve the file pointer back from struct kvm.
> +      *
> +      * Written/cleared via rcu_assign_pointer() and read locklessly under
> +      * RCU (e.g. via get_file_active() to prevent ABA races).
> +      */
> +     struct file *vm_file;
>  #endif

We didn't really talk about this during the calls, but it seems weird to
preserve a vm_file with pretty much nothing other than the vm type. The
entire VM is re-created, which means it could potentially be a
completely different VM?

In some sense it's more flexible since the guest_memfd can be restored
with some completely different VM, but it seems like it could introduce
other issues.

I think other KVM folks would probably have more thoughts here.

>       char stats_id[KVM_STATS_NAME_SIZE];
>  };
> @@ -1074,7 +1086,9 @@ void kvm_get_kvm(struct kvm *kvm);
>  bool kvm_get_kvm_safe(struct kvm *kvm);
>  void kvm_put_kvm(struct kvm *kvm);
>  bool file_is_kvm(struct file *file);
> +struct file *kvm_create_vm_file(unsigned long type, const char *fdname);
>  void kvm_put_kvm_no_destroy(struct kvm *kvm);
> +void kvm_uevent_notify_vm_create(struct kvm *kvm);
>
>  static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
>  {
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 89489996fbc1..65f0c5fb353e 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -67,9 +67,6 @@
>  #include <linux/kvm_dirty_ring.h>
>
>
> -/* Worst case buffer size needed for holding an integer. */
> -#define ITOA_MAX_LEN 12
> -
>  MODULE_AUTHOR("Qumranet");
>  MODULE_DESCRIPTION("Kernel-based Virtual Machine (KVM) Hypervisor");
>  MODULE_LICENSE("GPL");
> @@ -1349,6 +1346,19 @@ static int kvm_vm_release(struct inode *inode, struct 
> file *filp)
>  {
>       struct kvm *kvm = filp->private_data;
>
> +#ifdef CONFIG_LIVEUPDATE_GUEST_MEMFD
> +     /*
> +      * Clear the weak reference of the vm file.
> +      * In case vm file is closed by userspace, but kvm still has
> +      * other users like vCPUs, clearing this pointer ensures
> +      * that we don't have a dangling pointer to a closed file.
> +      *
> +      * Cleared via rcu_assign_pointer() to ensure proper memory visibility
> +      * for concurrent lockless readers under RCU.
> +      */
> +     rcu_assign_pointer(kvm->vm_file, NULL);
> +#endif
> +
>       kvm_irqfd_release(kvm);
>
>       kvm_put_kvm(kvm);
> @@ -5476,11 +5486,47 @@ bool file_is_kvm(struct file *file)
>  }
>  EXPORT_SYMBOL_FOR_KVM_INTERNAL(file_is_kvm);
>
> +struct file *kvm_create_vm_file(unsigned long type, const char *fdname)
> +{
> +     struct kvm *kvm = kvm_create_vm(type, fdname);
> +     struct file *file;
> +
> +     if (IS_ERR(kvm))
> +             return ERR_CAST(kvm);
> +
> +     file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
> +     if (IS_ERR(file)) {
> +             kvm_put_kvm(kvm);
> +             return file;
> +     }
> +
> +#ifdef CONFIG_LIVEUPDATE_GUEST_MEMFD
> +     /*
> +      * Weak reference to the file (without get_file()) to prevent a circular
> +      * dependency. Safe because the file's release path clears this pointer
> +      * and drops its reference to the VM.
> +      *
> +      * Written via rcu_assign_pointer() because the pointer can be read
> +      * locklessly under RCU (e.g., in kvm_gmem_luo_preserve() via
> +      * get_file_active() to prevent lockless ABA races).
> +      */
> +     rcu_assign_pointer(kvm->vm_file, file);
> +#endif
> +
> +     /*
> +      * Don't call kvm_put_kvm anymore at this point; file->f_op is
> +      * already set, with ->release() being kvm_vm_release().  In error
> +      * cases it will be called by the final fput(file) and will take
> +      * care of doing kvm_put_kvm(kvm).
> +      */
> +
> +     return file;
> +}
> +
>  static int kvm_dev_ioctl_create_vm(unsigned long type)
>  {
>       char fdname[ITOA_MAX_LEN + 1];
>       int r, fd;
> -     struct kvm *kvm;
>       struct file *file;
>
>       fd = get_unused_fd_flags(O_CLOEXEC);
> @@ -5489,31 +5535,17 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
>
>       snprintf(fdname, sizeof(fdname), "%d", fd);
>
> -     kvm = kvm_create_vm(type, fdname);
> -     if (IS_ERR(kvm)) {
> -             r = PTR_ERR(kvm);
> -             goto put_fd;
> -     }
> -
> -     file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
> +     file = kvm_create_vm_file(type, fdname);
>       if (IS_ERR(file)) {
>               r = PTR_ERR(file);
> -             goto put_kvm;
> +             goto put_fd;
>       }
>
> -     /*
> -      * Don't call kvm_put_kvm anymore at this point; file->f_op is
> -      * already set, with ->release() being kvm_vm_release().  In error
> -      * cases it will be called by the final fput(file) and will take
> -      * care of doing kvm_put_kvm(kvm).
> -      */
> -     kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm);
> +     kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, file->private_data);

Notifying with file->private_data threw me off... I would rather inline
the rcu_assign_pointer() in this function and have this line read
notify(..., kvm) like before.

>
>       fd_install(fd, file);
>       return fd;
>
> -put_kvm:
> -     kvm_put_kvm(kvm);
>  put_fd:
>       put_unused_fd(fd);
>       return r;
> @@ -6341,6 +6373,11 @@ static void kvm_uevent_notify_change(unsigned int 
> type, struct kvm *kvm)
>       kfree(env);
>  }
>
> +void kvm_uevent_notify_vm_create(struct kvm *kvm)
> +{
> +     kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm);
> +}
> +
>  static void kvm_init_debug(void)
>  {
>       const struct file_operations *fops;
> diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
> index 9fcc5d5b7f8d..7aa1d65c3d46 100644
> --- a/virt/kvm/kvm_mm.h
> +++ b/virt/kvm/kvm_mm.h
> @@ -3,6 +3,9 @@
>  #ifndef __KVM_MM_H__
>  #define __KVM_MM_H__ 1
>
> +/* Worst case buffer size needed for holding an integer as a string. */
> +#define ITOA_MAX_LEN 12
> +
>  /*
>   * Architectures can choose whether to use an rwlock or spinlock
>   * for the mmu_lock.  These macros, for use in common code
> --
> 2.54.0.1032.g2f8565e1d1-goog

Reply via email to