[linuxkernelnewbies] hugemem allocation

Peter Teoh Wed, 22 Apr 2009 08:57:41 -0700

static struct page *alloc_huge_page(struct vm_area_struct *vma,
                                    unsigned long addr, int avoid_reserve)
{
        struct hstate *h = hstate_vma(vma);
        struct page *page;
        struct address_space *mapping = vma->vm_file->f_mapping;
        struct inode *inode = mapping->host;
        long chg;

        /*
         * Processes that did not create the mapping will have no reserves and
         * will not have accounted against quota. Check that the quota can be
         * made before satisfying the allocation
         * MAP_NORESERVE mappings may also need pages and quota allocated
         * if no reserve mapping overlaps.
         */
        chg = vma_needs_reservation(h, vma, addr);
        if (chg < 0)
                return ERR_PTR(chg);
        if (chg)
                if (hugetlb_get_quota(inode->i_mapping, chg))
                        return ERR_PTR(-ENOSPC);

        spin_lock(&hugetlb_lock);
        page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve);
        spin_unlock(&hugetlb_lock);

        if (!page) {
                page = alloc_buddy_huge_page(h, vma, addr);
                if (!page) {
                        hugetlb_put_quota(inode->i_mapping, chg);
                        return ERR_PTR(-VM_FAULT_OOM);
                }
        }

        set_page_refcounted(page);
        set_page_private(page, (unsigned long) mapping);

        vma_commit_reservation(h, vma, addr);

        return page;
}

static struct page *alloc_buddy_huge_page(struct hstate *h,
                        struct vm_area_struct *vma, unsigned long address)
{
        struct page *page;
        unsigned int nid;

        if (h->order >= MAX_ORDER)
                return NULL;

        /*
         * Assume we will successfully allocate the surplus page to
         * prevent racing processes from causing the surplus to exceed
         * overcommit
         *
         * This however introduces a different race, where a process B
         * tries to grow the static hugepage pool while alloc_pages() is
         * called by process A. B will only examine the per-node
         * counters in determining if surplus huge pages can be
         * converted to normal huge pages in adjust_pool_surplus(). A
         * won't be able to increment the per-node counter, until the
         * lock is dropped by B, but B doesn't drop hugetlb_lock until
         * no more huge pages can be converted from surplus to normal
         * state (and doesn't try to convert again). Thus, we have a
         * case where a surplus huge page exists, the pool is grown, and
         * the surplus huge page still exists after, even though it
         * should just have been converted to a normal huge page. This
         * does not leak memory, though, as the hugepage will be freed
         * once it is out of use. It also does not allow the counters to
         * go out of whack in adjust_pool_surplus() as we don't modify
         * the node values until we've gotten the hugepage and only the
         * per-node value is checked there.
         */
        spin_lock(&hugetlb_lock);
        if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
                spin_unlock(&hugetlb_lock);
                return NULL;
        } else {
                h->nr_huge_pages++;
                h->surplus_huge_pages++;
        }
        spin_unlock(&hugetlb_lock);

        page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
                                        __GFP_REPEAT|__GFP_NOWARN,
                                        huge_page_order(h));

        if (page && arch_prepare_hugepage(page)) {
                __free_pages(page, huge_page_order(h));
                return NULL;
        }

        if (page && arch_prepare_hugepage(page)) {
                __free_pages(page, huge_page_order(h));
                return NULL;
        }

        spin_lock(&hugetlb_lock);
        if (page) {
                /*
                 * This page is now managed by the hugetlb allocator and has
                 * no users -- drop the buddy allocator's reference.
                 */
                put_page_testzero(page);
                VM_BUG_ON(page_count(page));
                nid = page_to_nid(page);
                set_compound_page_dtor(page, free_huge_page);
                /*
                 * We incremented the global counters already
                 */
                h->nr_huge_pages_node[nid]++;
                h->surplus_huge_pages_node[nid]++;
                __count_vm_event(HTLB_BUDDY_PGALLOC);
        } else {
                h->nr_huge_pages--;
                h->surplus_huge_pages--;
                __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
        }
        spin_unlock(&hugetlb_lock);

        return page;
}

[linuxkernelnewbies] hugemem allocation

Reply via email to