If `KVM_GMEM_NO_DIRECT_MAP` is set, all gmem folios are removed from the
direct map immediately after allocation. Add a flag to
kvm_gmem_grab_folio to overwrite this behavior, and expose it via
`kvm_gmem_get_pfn`. Only allow this flag to be set if KVM can actually
access gmem (currently only if the vm type is KVM_X86_SW_PROTECTED_VM).

KVM_GMEM_GET_PFN_SHARED defers the direct map removal for newly
allocated folios until kvm_gmem_put_shared_pfn is called. For existing
folios, the direct map entry is temporarily restored until
kvm_gmem_put_shared_pfn is called.

The folio lock must be held the entire time the folio is present in the
direct map, to prevent races with concurrent calls
kvm_gmem_folio_set_private that might remove direct map entries while
the folios are being accessed by KVM. As this is currently not possible
(kvm_gmem_get_pfn always unlocks the folio), the next patch will
introduce a KVM_GMEM_GET_PFN_LOCKED flag.

Signed-off-by: Patrick Roy <[email protected]>
---
 arch/x86/kvm/mmu/mmu.c   |  2 +-
 include/linux/kvm_host.h | 12 +++++++++--
 virt/kvm/guest_memfd.c   | 46 +++++++++++++++++++++++++++++++---------
 3 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 901be9e420a4c..cb2f111f2cce0 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4349,7 +4349,7 @@ static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
        }
 
        r = kvm_gmem_get_pfn(vcpu->kvm, fault->slot, fault->gfn, &fault->pfn,
-                            &max_order);
+                            &max_order, 0);
        if (r) {
                kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
                return r;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 689e8be873a75..8a2975674de4b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2432,17 +2432,25 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, 
gfn_t gfn)
 }
 #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
 
+#define KVM_GMEM_GET_PFN_SHARED         BIT(0)
+#define KVM_GMEM_GET_PFN_PREPARE        BIT(31)  /* internal */
+
 #ifdef CONFIG_KVM_PRIVATE_MEM
 int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
-                    gfn_t gfn, kvm_pfn_t *pfn, int *max_order);
+                    gfn_t gfn, kvm_pfn_t *pfn, int *max_order, unsigned long 
flags);
+int kvm_gmem_put_shared_pfn(kvm_pfn_t pfn);
 #else
 static inline int kvm_gmem_get_pfn(struct kvm *kvm,
                                   struct kvm_memory_slot *slot, gfn_t gfn,
-                                  kvm_pfn_t *pfn, int *max_order)
+                                  kvm_pfn_t *pfn, int *max_order, int flags)
 {
        KVM_BUG_ON(1, kvm);
        return -EIO;
 }
+static inline int kvm_gmem_put_shared_pfn(kvm_pfn_t pfn)
+{
+       return -EIO;
+}
 #endif /* CONFIG_KVM_PRIVATE_MEM */
 
 #ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 2ed27992206f3..492b04f4e5c18 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -55,6 +55,11 @@ static bool kvm_gmem_test_no_direct_map(struct inode *inode)
        return ((unsigned long)inode->i_private & KVM_GMEM_NO_DIRECT_MAP) == 
KVM_GMEM_NO_DIRECT_MAP;
 }
 
+static bool kvm_gmem_test_accessible(struct kvm *kvm)
+{
+       return kvm->arch.vm_type == KVM_X86_SW_PROTECTED_VM;
+}
+
 static int kvm_gmem_folio_set_private(struct folio *folio)
 {
        unsigned long start, npages, i;
@@ -110,10 +115,11 @@ static int kvm_gmem_folio_clear_private(struct folio 
*folio)
        return r;
 }
 
-static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, 
bool prepare)
+static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, 
unsigned long flags)
 {
        int r;
        struct folio *folio;
+       bool share = flags & KVM_GMEM_GET_PFN_SHARED;
 
        /* TODO: Support huge pages. */
        folio = filemap_grab_folio(inode->i_mapping, index);
@@ -139,7 +145,7 @@ static struct folio *kvm_gmem_get_folio(struct inode 
*inode, pgoff_t index, bool
                folio_mark_uptodate(folio);
        }
 
-       if (prepare) {
+       if (flags & KVM_GMEM_GET_PFN_PREPARE) {
                r = kvm_gmem_prepare_folio(inode, index, folio);
                if (r < 0)
                        goto out_err;
@@ -148,12 +154,15 @@ static struct folio *kvm_gmem_get_folio(struct inode 
*inode, pgoff_t index, bool
        if (!kvm_gmem_test_no_direct_map(inode))
                goto out;
 
-       if (!folio_test_private(folio)) {
+       if (folio_test_private(folio) && share) {
+               r = kvm_gmem_folio_clear_private(folio);
+       } else if (!folio_test_private(folio) && !share) {
                r = kvm_gmem_folio_set_private(folio);
-               if (r)
-                       goto out_err;
        }
 
+       if (r)
+               goto out_err;
+
 out:
        /*
         * Ignore accessed, referenced, and dirty flags.  The memory is
@@ -264,7 +273,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t 
offset, loff_t len)
                        break;
                }
 
-               folio = kvm_gmem_get_folio(inode, index, true);
+               folio = kvm_gmem_get_folio(inode, index, 
KVM_GMEM_GET_PFN_PREPARE);
                if (IS_ERR(folio)) {
                        r = PTR_ERR(folio);
                        break;
@@ -624,7 +633,7 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot)
 }
 
 static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
-                      gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare)
+                      gfn_t gfn, kvm_pfn_t *pfn, int *max_order, unsigned long 
flags)
 {
        pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
        struct kvm_gmem *gmem = file->private_data;
@@ -643,7 +652,7 @@ static int __kvm_gmem_get_pfn(struct file *file, struct 
kvm_memory_slot *slot,
                return -EIO;
        }
 
-       folio = kvm_gmem_get_folio(file_inode(file), index, prepare);
+       folio = kvm_gmem_get_folio(file_inode(file), index, flags);
        if (IS_ERR(folio))
                return PTR_ERR(folio);
 
@@ -667,20 +676,37 @@ static int __kvm_gmem_get_pfn(struct file *file, struct 
kvm_memory_slot *slot,
 }
 
 int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
-                    gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
+                    gfn_t gfn, kvm_pfn_t *pfn, int *max_order, unsigned long 
flags)
 {
        struct file *file = kvm_gmem_get_file(slot);
        int r;
+       int valid_flags = KVM_GMEM_GET_PFN_SHARED;
+
+       if ((flags & valid_flags) != flags)
+               return -EINVAL;
+
+       if ((flags & KVM_GMEM_GET_PFN_SHARED) && !kvm_gmem_test_accessible(kvm))
+               return -EPERM;
 
        if (!file)
                return -EFAULT;
 
-       r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true);
+       r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, flags | 
KVM_GMEM_GET_PFN_PREPARE);
        fput(file);
        return r;
 }
 EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
 
+int kvm_gmem_put_shared_pfn(kvm_pfn_t pfn) {
+       struct folio *folio = pfn_folio(pfn);
+
+       if (!kvm_gmem_test_no_direct_map(folio_inode(folio)))
+               return 0;
+
+       return kvm_gmem_folio_set_private(folio);
+}
+EXPORT_SYMBOL_GPL(kvm_gmem_put_shared_pfn);
+
 long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, 
long npages,
                       kvm_gmem_populate_cb post_populate, void *opaque)
 {
-- 
2.46.0


Reply via email to