Page tables can bite a relatively big chunk off system memory and their
allocations are easy to trigger from userspace, so they should be
accounted to kmemcg.

This patch marks page table allocations as __GFP_ACCOUNT for x86. Note
we must not charge allocations of kernel page tables, because they can
be shared among processes from different cgroups so accounting them to a
particular one can pin other cgroups for indefinitely long. So we clear
__GFP_ACCOUNT flag if a page table is allocated for the kernel.

Signed-off-by: Vladimir Davydov <vdavy...@virtuozzo.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: "H. Peter Anvin" <h...@zytor.com>
---
 arch/x86/include/asm/pgalloc.h | 12 ++++++++++--
 arch/x86/mm/pgtable.c          | 11 ++++++++---
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index bf7f8b55b0f9..2f531633cb16 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -81,7 +81,11 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t 
*pmd,
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
        struct page *page;
-       page = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0);
+       gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_REPEAT | __GFP_ZERO;
+
+       if (mm == &init_mm)
+               gfp &= ~__GFP_ACCOUNT;
+       page = alloc_pages(gfp, 0);
        if (!page)
                return NULL;
        if (!pgtable_pmd_page_ctor(page)) {
@@ -125,7 +129,11 @@ static inline void pgd_populate(struct mm_struct *mm, 
pgd_t *pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-       return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+       gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_REPEAT;
+
+       if (mm == &init_mm)
+               gfp &= ~__GFP_ACCOUNT;
+       return (pud_t *)get_zeroed_page(gfp);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 4eb287e25043..421ac6b74d11 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -6,7 +6,8 @@
 #include <asm/fixmap.h>
 #include <asm/mtrr.h>
 
-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | __GFP_REPEAT | \
+                    __GFP_ZERO)
 
 #ifdef CONFIG_HIGHPTE
 #define PGALLOC_USER_GFP __GFP_HIGHMEM
@@ -18,7 +19,7 @@ gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-       return (pte_t *)__get_free_page(PGALLOC_GFP);
+       return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
 }
 
 pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
@@ -207,9 +208,13 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t 
*pmds[])
 {
        int i;
        bool failed = false;
+       gfp_t gfp = PGALLOC_GFP;
+
+       if (mm == &init_mm)
+               gfp &= ~__GFP_ACCOUNT;
 
        for(i = 0; i < PREALLOCATED_PMDS; i++) {
-               pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP);
+               pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
                if (!pmd)
                        failed = true;
                if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
-- 
2.1.4

Reply via email to