From: Michal Hocko <[email protected]>

Now that __GFP_NOFAIL doesn't override decisions to skip the oom killer
we are left with requests which require to loop inside the allocator
without invoking the oom killer (e.g. GFP_NOFS|__GFP_NOFAIL used by fs
code) and so they might, in very unlikely situations, loop for ever -
e.g. other parallel request could starve them.

This patch tries to limit the likelihood of such a lockup by giving
these __GFP_NOFAIL requests a chance to move on by consuming a small
part of memory reserves. We are using ALLOC_HARDER which should be
enough to prevent from the starvation by regular allocation requests,
yet it shouldn't consume enough from the reserves to disrupt high
priority requests (ALLOC_HIGH).

While we are at it, let's introduce a helper __alloc_pages_cpuset_fallback
which enforces the cpusets but allows to fallback to ignore them if
the first attempt fails. __GFP_NOFAIL requests can be considered
important enough to allow cpuset runaway in order for the system to move
on. It is highly unlikely that any of these will be GFP_USER anyway.

Signed-off-by: Michal Hocko <[email protected]>
---
 mm/page_alloc.c | 46 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2dda7c3eba52..e8e551015d48 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3064,6 +3064,26 @@ void warn_alloc(gfp_t gfp_mask, const char *fmt, ...)
 }
 
 static inline struct page *
+__alloc_pages_cpuset_fallback(gfp_t gfp_mask, unsigned int order,
+                             unsigned int alloc_flags,
+                             const struct alloc_context *ac)
+{
+       struct page *page;
+
+       page = get_page_from_freelist(gfp_mask, order,
+                       alloc_flags|ALLOC_CPUSET, ac);
+       /*
+        * fallback to ignore cpuset restriction if our nodes
+        * are depleted
+        */
+       if (!page)
+               page = get_page_from_freelist(gfp_mask, order,
+                               alloc_flags, ac);
+
+       return page;
+}
+
+static inline struct page *
 __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
        const struct alloc_context *ac, unsigned long *did_some_progress)
 {
@@ -3127,17 +3147,13 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int 
order,
        if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
                *did_some_progress = 1;
 
-               if (gfp_mask & __GFP_NOFAIL) {
-                       page = get_page_from_freelist(gfp_mask, order,
-                                       ALLOC_NO_WATERMARKS|ALLOC_CPUSET, ac);
-                       /*
-                        * fallback to ignore cpuset restriction if our nodes
-                        * are depleted
-                        */
-                       if (!page)
-                               page = get_page_from_freelist(gfp_mask, order,
+               /*
+                * Help non-failing allocations by giving them access to memory
+                * reserves
+                */
+               if (gfp_mask & __GFP_NOFAIL)
+                       page = __alloc_pages_cpuset_fallback(gfp_mask, order,
                                        ALLOC_NO_WATERMARKS, ac);
-               }
        }
 out:
        mutex_unlock(&oom_lock);
@@ -3743,6 +3759,16 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int 
order,
                 */
                WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
 
+               /*
+                * Help non-failing allocations by giving them access to memory
+                * reserves but do not use ALLOC_NO_WATERMARKS because this
+                * could deplete whole memory reserves which would just make
+                * the situation worse
+                */
+               page = __alloc_pages_cpuset_fallback(gfp_mask, order, 
ALLOC_HARDER, ac);
+               if (page)
+                       goto got_pg;
+
                cond_resched();
                goto retry;
        }
-- 
2.10.2

Reply via email to