When using remap_pfn_range() from a fault handler, we are exposed to
races between concurrent faults. Rather than hitting a BUG, report the
error back to the caller, like vm_insert_pfn().

v2: Fix the pte address for unmapping along the error path.
v3: Report the error back and cleanup partial remaps.

Signed-off-by: Chris Wilson <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: "Kirill A. Shutemov" <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Cyrill Gorcunov <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: [email protected]
---

Whilst this has the semantics I want to allow two concurrent, but
serialised, pagefaults that try to prefault the same object to succeed,
it looks fragile and fraught with subtlety.
-Chris

---
 mm/memory.c | 54 ++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 38 insertions(+), 16 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index d67fd9f..be51fcc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1657,32 +1657,41 @@ EXPORT_SYMBOL(vm_insert_mixed);
  * in null mappings (currently treated as "copy-on-access")
  */
 static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
-                       unsigned long addr, unsigned long end,
-                       unsigned long pfn, pgprot_t prot)
+                          unsigned long addr, unsigned long end,
+                          unsigned long pfn, pgprot_t prot,
+                          bool first)
 {
        pte_t *pte;
        spinlock_t *ptl;
+       int err = 0;
 
        pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
        if (!pte)
                return -ENOMEM;
        arch_enter_lazy_mmu_mode();
        do {
-               BUG_ON(!pte_none(*pte));
+               if (!pte_none(*pte)) {
+                       err = first ? -EBUSY : -EINVAL;
+                       pte++;
+                       break;
+               }
+               first = false;
                set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
                pfn++;
        } while (pte++, addr += PAGE_SIZE, addr != end);
        arch_leave_lazy_mmu_mode();
        pte_unmap_unlock(pte - 1, ptl);
-       return 0;
+       return err;
 }
 
 static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
-                       unsigned long addr, unsigned long end,
-                       unsigned long pfn, pgprot_t prot)
+                                 unsigned long addr, unsigned long end,
+                                 unsigned long pfn, pgprot_t prot,
+                                 bool first)
 {
        pmd_t *pmd;
        unsigned long next;
+       int err;
 
        pfn -= addr >> PAGE_SHIFT;
        pmd = pmd_alloc(mm, pud, addr);
@@ -1691,19 +1700,23 @@ static inline int remap_pmd_range(struct mm_struct *mm, 
pud_t *pud,
        VM_BUG_ON(pmd_trans_huge(*pmd));
        do {
                next = pmd_addr_end(addr, end);
-               if (remap_pte_range(mm, pmd, addr, next,
-                               pfn + (addr >> PAGE_SHIFT), prot))
-                       return -ENOMEM;
+               err = remap_pte_range(mm, pmd, addr, next,
+                                     pfn + (addr >> PAGE_SHIFT), prot, first);
+               if (err)
+                       return err;
+
+               first = false;
        } while (pmd++, addr = next, addr != end);
        return 0;
 }
 
 static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
-                       unsigned long addr, unsigned long end,
-                       unsigned long pfn, pgprot_t prot)
+                                 unsigned long addr, unsigned long end,
+                                 unsigned long pfn, pgprot_t prot, bool first)
 {
        pud_t *pud;
        unsigned long next;
+       int err;
 
        pfn -= addr >> PAGE_SHIFT;
        pud = pud_alloc(mm, pgd, addr);
@@ -1711,9 +1724,12 @@ static inline int remap_pud_range(struct mm_struct *mm, 
pgd_t *pgd,
                return -ENOMEM;
        do {
                next = pud_addr_end(addr, end);
-               if (remap_pmd_range(mm, pud, addr, next,
-                               pfn + (addr >> PAGE_SHIFT), prot))
-                       return -ENOMEM;
+               err = remap_pmd_range(mm, pud, addr, next,
+                                     pfn + (addr >> PAGE_SHIFT), prot, first);
+               if (err)
+                       return err;
+
+               first = false;
        } while (pud++, addr = next, addr != end);
        return 0;
 }
@@ -1735,6 +1751,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned 
long addr,
        unsigned long next;
        unsigned long end = addr + PAGE_ALIGN(size);
        struct mm_struct *mm = vma->vm_mm;
+       bool first = true;
        int err;
 
        /*
@@ -1774,13 +1791,18 @@ int remap_pfn_range(struct vm_area_struct *vma, 
unsigned long addr,
        do {
                next = pgd_addr_end(addr, end);
                err = remap_pud_range(mm, pgd, addr, next,
-                               pfn + (addr >> PAGE_SHIFT), prot);
+                                     pfn + (addr >> PAGE_SHIFT), prot, first);
                if (err)
                        break;
+
+               first = false;
        } while (pgd++, addr = next, addr != end);
 
-       if (err)
+       if (err) {
                untrack_pfn(vma, pfn, PAGE_ALIGN(size));
+               if (err != -EBUSY)
+                       zap_page_range_single(vma, addr, size, NULL);
+       }
 
        return err;
 }
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to