From: Johannes Weiner <han...@cmpxchg.org>

Support nesting of memalloc_use_memcg() to be able to use
from an interrupt context.

Make memalloc_use_memcg() return the old memcg and convert existing
users to a stacking model. Delete the unused memalloc_unuse_memcg().

Roman: I've rephrased the original commit log, because it was
focused on the accounting problem related to loop devices. I made
it less specific, so it can work for bpf too. Also rebased to the
current state of the mm tree.

The original patch can be found here:
https://lkml.org/lkml/2020/5/28/806

Signed-off-by: Johannes Weiner <han...@cmpxchg.org>
Signed-off-by: Roman Gushchin <g...@fb.com>
---
 fs/buffer.c                          |  6 +++---
 fs/notify/fanotify/fanotify.c        |  5 +++--
 fs/notify/inotify/inotify_fsnotify.c |  5 +++--
 include/linux/sched/mm.h             | 28 +++++++++-------------------
 mm/memcontrol.c                      |  6 +++---
 5 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 061dd202979d..97ef480db0da 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -842,13 +842,13 @@ struct buffer_head *alloc_page_buffers(struct page *page, 
unsigned long size,
        struct buffer_head *bh, *head;
        gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
        long offset;
-       struct mem_cgroup *memcg;
+       struct mem_cgroup *memcg, *old_memcg;
 
        if (retry)
                gfp |= __GFP_NOFAIL;
 
        memcg = get_mem_cgroup_from_page(page);
-       memalloc_use_memcg(memcg);
+       old_memcg = memalloc_use_memcg(memcg);
 
        head = NULL;
        offset = PAGE_SIZE;
@@ -867,7 +867,7 @@ struct buffer_head *alloc_page_buffers(struct page *page, 
unsigned long size,
                set_bh_page(bh, page, offset);
        }
 out:
-       memalloc_unuse_memcg();
+       memalloc_use_memcg(old_memcg);
        mem_cgroup_put(memcg);
        return head;
 /*
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index c942910a8649..0e59fa57f6d7 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -531,6 +531,7 @@ static struct fanotify_event *fanotify_alloc_event(struct 
fsnotify_group *group,
        struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
        const struct path *path = fsnotify_data_path(data, data_type);
        unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
+       struct mem_cgroup *old_memcg;
        struct inode *child = NULL;
        bool name_event = false;
 
@@ -580,7 +581,7 @@ static struct fanotify_event *fanotify_alloc_event(struct 
fsnotify_group *group,
                gfp |= __GFP_RETRY_MAYFAIL;
 
        /* Whoever is interested in the event, pays for the allocation. */
-       memalloc_use_memcg(group->memcg);
+       old_memcg = memalloc_use_memcg(group->memcg);
 
        if (fanotify_is_perm_event(mask)) {
                event = fanotify_alloc_perm_event(path, gfp);
@@ -608,7 +609,7 @@ static struct fanotify_event *fanotify_alloc_event(struct 
fsnotify_group *group,
                event->pid = get_pid(task_tgid(current));
 
 out:
-       memalloc_unuse_memcg();
+       memalloc_use_memcg(old_memcg);
        return event;
 }
 
diff --git a/fs/notify/inotify/inotify_fsnotify.c 
b/fs/notify/inotify/inotify_fsnotify.c
index a65cf8c9f600..8017a51561c4 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -66,6 +66,7 @@ static int inotify_one_event(struct fsnotify_group *group, 
u32 mask,
        int ret;
        int len = 0;
        int alloc_len = sizeof(struct inotify_event_info);
+       struct mem_cgroup *old_memcg;
 
        if ((inode_mark->mask & FS_EXCL_UNLINK) &&
            path && d_unlinked(path->dentry))
@@ -87,9 +88,9 @@ static int inotify_one_event(struct fsnotify_group *group, 
u32 mask,
         * trigger OOM killer in the target monitoring memcg as it may have
         * security repercussion.
         */
-       memalloc_use_memcg(group->memcg);
+       old_memcg = memalloc_use_memcg(group->memcg);
        event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
-       memalloc_unuse_memcg();
+       memalloc_use_memcg(old_memcg);
 
        if (unlikely(!event)) {
                /*
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index f889e332912f..b8fde48d44a9 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -312,31 +312,21 @@ static inline void memalloc_nocma_restore(unsigned int 
flags)
  * __GFP_ACCOUNT allocations till the end of the scope will be charged to the
  * given memcg.
  *
- * NOTE: This function is not nesting safe.
+ * NOTE: This function can nest. Users must save the return value and
+ * reset the previous value after their own charging scope is over
  */
-static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
+static inline struct mem_cgroup *
+memalloc_use_memcg(struct mem_cgroup *memcg)
 {
-       WARN_ON_ONCE(current->active_memcg);
+       struct mem_cgroup *old = current->active_memcg;
        current->active_memcg = memcg;
-}
-
-/**
- * memalloc_unuse_memcg - Ends the remote memcg charging scope.
- *
- * This function marks the end of the remote memcg charging scope started by
- * memalloc_use_memcg().
- */
-static inline void memalloc_unuse_memcg(void)
-{
-       current->active_memcg = NULL;
+       return old;
 }
 #else
-static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
-{
-}
-
-static inline void memalloc_unuse_memcg(void)
+static inline struct mem_cgroup *
+memalloc_use_memcg(struct mem_cgroup *memcg)
 {
+       return NULL;
 }
 #endif
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b807952b4d43..b2468c80085d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5271,12 +5271,12 @@ static struct cgroup_subsys_state * __ref
 mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 {
        struct mem_cgroup *parent = mem_cgroup_from_css(parent_css);
-       struct mem_cgroup *memcg;
+       struct mem_cgroup *memcg, *old_memcg;
        long error = -ENOMEM;
 
-       memalloc_use_memcg(parent);
+       old_memcg = memalloc_use_memcg(parent);
        memcg = mem_cgroup_alloc();
-       memalloc_unuse_memcg();
+       memalloc_use_memcg(old_memcg);
        if (IS_ERR(memcg))
                return ERR_CAST(memcg);
 
-- 
2.26.2

Reply via email to