|
static struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve) { struct hstate *h = hstate_vma(vma); struct page *page; struct address_space *mapping = vma->vm_file->f_mapping; struct inode *inode = mapping->host; long chg; /* * Processes that did not create the mapping will have no reserves and * will not have accounted against quota. Check that the quota can be * made before satisfying the allocation * MAP_NORESERVE mappings may also need pages and quota allocated * if no reserve mapping overlaps. */ chg = vma_needs_reservation(h, vma, addr); if (chg < 0) return ERR_PTR(chg); if (chg) if (hugetlb_get_quota(inode->i_mapping, chg)) return ERR_PTR(-ENOSPC); spin_lock(&hugetlb_lock); page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve); spin_unlock(&hugetlb_lock); if (!page) { page = alloc_buddy_huge_page(h, vma, addr); if (!page) { hugetlb_put_quota(inode->i_mapping, chg); return ERR_PTR(-VM_FAULT_OOM); } } set_page_refcounted(page); set_page_private(page, (unsigned long) mapping); vma_commit_reservation(h, vma, addr); return page; } static struct page *alloc_buddy_huge_page(struct hstate *h, struct vm_area_struct *vma, unsigned long address) { struct page *page; unsigned int nid; if (h->order >= MAX_ORDER) return NULL; /* * Assume we will successfully allocate the surplus page to * prevent racing processes from causing the surplus to exceed * overcommit * * This however introduces a different race, where a process B * tries to grow the static hugepage pool while alloc_pages() is * called by process A. B will only examine the per-node * counters in determining if surplus huge pages can be * converted to normal huge pages in adjust_pool_surplus(). A * won't be able to increment the per-node counter, until the * lock is dropped by B, but B doesn't drop hugetlb_lock until * no more huge pages can be converted from surplus to normal * state (and doesn't try to convert again). Thus, we have a * case where a surplus huge page exists, the pool is grown, and * the surplus huge page still exists after, even though it * should just have been converted to a normal huge page. This * does not leak memory, though, as the hugepage will be freed * once it is out of use. It also does not allow the counters to * go out of whack in adjust_pool_surplus() as we don't modify * the node values until we've gotten the hugepage and only the * per-node value is checked there. */ spin_lock(&hugetlb_lock); if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) { spin_unlock(&hugetlb_lock); return NULL; } else { h->nr_huge_pages++; h->surplus_huge_pages++; } spin_unlock(&hugetlb_lock); page = alloc_pages(htlb_alloc_mask|__GFP_COMP| __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h)); if (page && arch_prepare_hugepage(page)) { __free_pages(page, huge_page_order(h)); return NULL; } if (page && arch_prepare_hugepage(page)) { __free_pages(page, huge_page_order(h)); return NULL; } spin_lock(&hugetlb_lock); if (page) { /* * This page is now managed by the hugetlb allocator and has * no users -- drop the buddy allocator's reference. */ put_page_testzero(page); VM_BUG_ON(page_count(page)); nid = page_to_nid(page); set_compound_page_dtor(page, free_huge_page); /* * We incremented the global counters already */ h->nr_huge_pages_node[nid]++; h->surplus_huge_pages_node[nid]++; __count_vm_event(HTLB_BUDDY_PGALLOC); } else { h->nr_huge_pages--; h->surplus_huge_pages--; __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL); } spin_unlock(&hugetlb_lock); return page; } |
