Tarun Sahu <[email protected]> writes: > Introduce core infrastructure to support VM preservation with LUO. > > First two changes are just refactoring, no functional change, third > change introduces a new member in struct kvm. > - Move ITOA_MAX_LEN to kvm_mm.h for reuse by upcoming kvm_luo code. > - Add a public kvm_create_vm_file() helper wrapping kvm_create_vm() > and anon_inode_getfile() to provide a unified VM file creation API. > - Track a weak reference to the backing file in struct kvm under > CONFIG_LIVEUPDATE_GUEST_MEMFD to enable reverse file resolution > without circular lifetime dependencies. >
Given the above, I think this should be separate patches. > Signed-off-by: Tarun Sahu <[email protected]> > --- > include/linux/kvm_host.h | 14 +++++++ > virt/kvm/kvm_main.c | 79 +++++++++++++++++++++++++++++----------- > virt/kvm/kvm_mm.h | 3 ++ > 3 files changed, 75 insertions(+), 21 deletions(-) > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 4c14aee1fb06..9111a28637af 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -874,6 +874,18 @@ struct kvm { > #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES > /* Protected by slots_lock (for writes) and RCU (for reads) */ > struct xarray mem_attr_array; > +#endif > +#ifdef CONFIG_LIVEUPDATE_GUEST_MEMFD > + /* > + * Weak reference to the VFS file backing this KVM instance. Stored > + * without incrementing the file refcount to prevent a circular lifetime > + * dependency (since file->private_data already pins this struct kvm). > + * Used exclusively to resolve the file pointer back from struct kvm. > + * > + * Written/cleared via rcu_assign_pointer() and read locklessly under > + * RCU (e.g. via get_file_active() to prevent ABA races). > + */ > + struct file *vm_file; > #endif We didn't really talk about this during the calls, but it seems weird to preserve a vm_file with pretty much nothing other than the vm type. The entire VM is re-created, which means it could potentially be a completely different VM? In some sense it's more flexible since the guest_memfd can be restored with some completely different VM, but it seems like it could introduce other issues. I think other KVM folks would probably have more thoughts here. > char stats_id[KVM_STATS_NAME_SIZE]; > }; > @@ -1074,7 +1086,9 @@ void kvm_get_kvm(struct kvm *kvm); > bool kvm_get_kvm_safe(struct kvm *kvm); > void kvm_put_kvm(struct kvm *kvm); > bool file_is_kvm(struct file *file); > +struct file *kvm_create_vm_file(unsigned long type, const char *fdname); > void kvm_put_kvm_no_destroy(struct kvm *kvm); > +void kvm_uevent_notify_vm_create(struct kvm *kvm); > > static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) > { > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 89489996fbc1..65f0c5fb353e 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -67,9 +67,6 @@ > #include <linux/kvm_dirty_ring.h> > > > -/* Worst case buffer size needed for holding an integer. */ > -#define ITOA_MAX_LEN 12 > - > MODULE_AUTHOR("Qumranet"); > MODULE_DESCRIPTION("Kernel-based Virtual Machine (KVM) Hypervisor"); > MODULE_LICENSE("GPL"); > @@ -1349,6 +1346,19 @@ static int kvm_vm_release(struct inode *inode, struct > file *filp) > { > struct kvm *kvm = filp->private_data; > > +#ifdef CONFIG_LIVEUPDATE_GUEST_MEMFD > + /* > + * Clear the weak reference of the vm file. > + * In case vm file is closed by userspace, but kvm still has > + * other users like vCPUs, clearing this pointer ensures > + * that we don't have a dangling pointer to a closed file. > + * > + * Cleared via rcu_assign_pointer() to ensure proper memory visibility > + * for concurrent lockless readers under RCU. > + */ > + rcu_assign_pointer(kvm->vm_file, NULL); > +#endif > + > kvm_irqfd_release(kvm); > > kvm_put_kvm(kvm); > @@ -5476,11 +5486,47 @@ bool file_is_kvm(struct file *file) > } > EXPORT_SYMBOL_FOR_KVM_INTERNAL(file_is_kvm); > > +struct file *kvm_create_vm_file(unsigned long type, const char *fdname) > +{ > + struct kvm *kvm = kvm_create_vm(type, fdname); > + struct file *file; > + > + if (IS_ERR(kvm)) > + return ERR_CAST(kvm); > + > + file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); > + if (IS_ERR(file)) { > + kvm_put_kvm(kvm); > + return file; > + } > + > +#ifdef CONFIG_LIVEUPDATE_GUEST_MEMFD > + /* > + * Weak reference to the file (without get_file()) to prevent a circular > + * dependency. Safe because the file's release path clears this pointer > + * and drops its reference to the VM. > + * > + * Written via rcu_assign_pointer() because the pointer can be read > + * locklessly under RCU (e.g., in kvm_gmem_luo_preserve() via > + * get_file_active() to prevent lockless ABA races). > + */ > + rcu_assign_pointer(kvm->vm_file, file); > +#endif > + > + /* > + * Don't call kvm_put_kvm anymore at this point; file->f_op is > + * already set, with ->release() being kvm_vm_release(). In error > + * cases it will be called by the final fput(file) and will take > + * care of doing kvm_put_kvm(kvm). > + */ > + > + return file; > +} > + > static int kvm_dev_ioctl_create_vm(unsigned long type) > { > char fdname[ITOA_MAX_LEN + 1]; > int r, fd; > - struct kvm *kvm; > struct file *file; > > fd = get_unused_fd_flags(O_CLOEXEC); > @@ -5489,31 +5535,17 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) > > snprintf(fdname, sizeof(fdname), "%d", fd); > > - kvm = kvm_create_vm(type, fdname); > - if (IS_ERR(kvm)) { > - r = PTR_ERR(kvm); > - goto put_fd; > - } > - > - file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); > + file = kvm_create_vm_file(type, fdname); > if (IS_ERR(file)) { > r = PTR_ERR(file); > - goto put_kvm; > + goto put_fd; > } > > - /* > - * Don't call kvm_put_kvm anymore at this point; file->f_op is > - * already set, with ->release() being kvm_vm_release(). In error > - * cases it will be called by the final fput(file) and will take > - * care of doing kvm_put_kvm(kvm). > - */ > - kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm); > + kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, file->private_data); Notifying with file->private_data threw me off... I would rather inline the rcu_assign_pointer() in this function and have this line read notify(..., kvm) like before. > > fd_install(fd, file); > return fd; > > -put_kvm: > - kvm_put_kvm(kvm); > put_fd: > put_unused_fd(fd); > return r; > @@ -6341,6 +6373,11 @@ static void kvm_uevent_notify_change(unsigned int > type, struct kvm *kvm) > kfree(env); > } > > +void kvm_uevent_notify_vm_create(struct kvm *kvm) > +{ > + kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm); > +} > + > static void kvm_init_debug(void) > { > const struct file_operations *fops; > diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h > index 9fcc5d5b7f8d..7aa1d65c3d46 100644 > --- a/virt/kvm/kvm_mm.h > +++ b/virt/kvm/kvm_mm.h > @@ -3,6 +3,9 @@ > #ifndef __KVM_MM_H__ > #define __KVM_MM_H__ 1 > > +/* Worst case buffer size needed for holding an integer as a string. */ > +#define ITOA_MAX_LEN 12 > + > /* > * Architectures can choose whether to use an rwlock or spinlock > * for the mmu_lock. These macros, for use in common code > -- > 2.54.0.1032.g2f8565e1d1-goog

