Add pghint_t, a bitwise type for communicating page allocation hints
between the allocator and callers. Define PGHINT_ZEROED to indicate
that the allocated page contents are known to be zero.
Add _hints variants of the allocation functions that accept a
pghint_t *hints output parameter:
vma_alloc_folio_hints() -> folio_alloc_mpol_hints (internal)
-> __alloc_frozen_pages_hints()
The existing APIs are unchanged and continue to work without hints.
For now, hints is always initialized to 0. A subsequent patch will
set PGHINT_ZEROED when the page was pre-zeroed by the host.
Signed-off-by: Michael S. Tsirkin <[email protected]>
Assisted-by: Claude:claude-opus-4-6
Assisted-by: cursor-agent:GPT-5.4-xhigh
---
include/linux/gfp.h | 15 ++++++++
mm/internal.h | 4 +++
mm/mempolicy.c | 85 +++++++++++++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 15 ++++++--
4 files changed, 117 insertions(+), 2 deletions(-)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 51ef13ed756e..14433a20e60c 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -226,6 +226,9 @@ static inline void arch_free_page(struct page *page, int
order) { }
static inline void arch_alloc_page(struct page *page, int order) { }
#endif
+typedef unsigned int __bitwise pghint_t;
+#define PGHINT_ZEROED ((__force pghint_t)BIT(0))
+
struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order, int
preferred_nid,
nodemask_t *nodemask);
#define __alloc_pages(...)
alloc_hooks(__alloc_pages_noprof(__VA_ARGS__))
@@ -325,6 +328,9 @@ struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned
int order,
struct mempolicy *mpol, pgoff_t ilx, int nid);
struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct
vm_area_struct *vma,
unsigned long addr);
+struct folio *vma_alloc_folio_hints_noprof(gfp_t gfp, int order,
+ struct vm_area_struct *vma, unsigned long addr,
+ pghint_t *hints);
#else
static inline struct page *alloc_pages_noprof(gfp_t gfp_mask, unsigned int
order)
{
@@ -344,12 +350,21 @@ static inline struct folio *vma_alloc_folio_noprof(gfp_t
gfp, int order,
{
return folio_alloc_noprof(gfp, order);
}
+static inline struct folio *vma_alloc_folio_hints_noprof(gfp_t gfp, int order,
+ struct vm_area_struct *vma, unsigned long addr,
+ pghint_t *hints)
+{
+ if (hints)
+ *hints = 0;
+ return folio_alloc_noprof(gfp, order);
+}
#endif
#define alloc_pages(...)
alloc_hooks(alloc_pages_noprof(__VA_ARGS__))
#define folio_alloc(...)
alloc_hooks(folio_alloc_noprof(__VA_ARGS__))
#define folio_alloc_mpol(...)
alloc_hooks(folio_alloc_mpol_noprof(__VA_ARGS__))
#define vma_alloc_folio(...)
alloc_hooks(vma_alloc_folio_noprof(__VA_ARGS__))
+#define vma_alloc_folio_hints(...)
alloc_hooks(vma_alloc_folio_hints_noprof(__VA_ARGS__))
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
diff --git a/mm/internal.h b/mm/internal.h
index cb0af847d7d9..686667b956c0 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -894,8 +894,12 @@ extern int user_min_free_kbytes;
struct page *__alloc_frozen_pages_noprof(gfp_t, unsigned int order, int nid,
nodemask_t *);
+struct page *__alloc_frozen_pages_hints_noprof(gfp_t, unsigned int order,
+ int nid, nodemask_t *, pghint_t *hints);
#define __alloc_frozen_pages(...) \
alloc_hooks(__alloc_frozen_pages_noprof(__VA_ARGS__))
+#define __alloc_frozen_pages_hints(...) \
+ alloc_hooks(__alloc_frozen_pages_hints_noprof(__VA_ARGS__))
void free_frozen_pages(struct page *page, unsigned int order);
void free_unref_folios(struct folio_batch *fbatch);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index cf92bd6a8226..b918639eef71 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2547,6 +2547,91 @@ struct folio *vma_alloc_folio_noprof(gfp_t gfp, int
order, struct vm_area_struct
}
EXPORT_SYMBOL(vma_alloc_folio_noprof);
+static struct page *alloc_pages_preferred_many_hints(gfp_t gfp,
+ unsigned int order, int nid, nodemask_t *nodemask,
+ pghint_t *hints)
+{
+ struct page *page;
+ gfp_t preferred_gfp;
+
+ preferred_gfp = gfp | __GFP_NOWARN;
+ preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
+ page = __alloc_frozen_pages_hints_noprof(preferred_gfp, order, nid,
+ nodemask, hints);
+ if (!page)
+ page = __alloc_frozen_pages_hints_noprof(gfp, order, nid, NULL,
+ hints);
+
+ return page;
+}
+
+static struct page *alloc_pages_mpol_hints(gfp_t gfp, unsigned int order,
+ struct mempolicy *pol, pgoff_t ilx, int nid,
+ pghint_t *hints)
+{
+ nodemask_t *nodemask;
+ struct page *page;
+
+ nodemask = policy_nodemask(gfp, pol, ilx, &nid);
+
+ if (pol->mode == MPOL_PREFERRED_MANY)
+ return alloc_pages_preferred_many_hints(gfp, order, nid,
+ nodemask, hints);
+
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+ order == HPAGE_PMD_ORDER && ilx != NO_INTERLEAVE_INDEX) {
+ if (pol->mode != MPOL_INTERLEAVE &&
+ pol->mode != MPOL_WEIGHTED_INTERLEAVE &&
+ (!nodemask || node_isset(nid, *nodemask))) {
+ page = __alloc_frozen_pages_hints_noprof(
+ gfp | __GFP_THISNODE | __GFP_NORETRY, order,
+ nid, NULL, hints);
+ if (page || !(gfp & __GFP_DIRECT_RECLAIM))
+ return page;
+ }
+ }
+
+ page = __alloc_frozen_pages_hints_noprof(gfp, order, nid, nodemask,
+ hints);
+
+ if (unlikely(pol->mode == MPOL_INTERLEAVE ||
+ pol->mode == MPOL_WEIGHTED_INTERLEAVE) && page) {
+ if (static_branch_likely(&vm_numa_stat_key) &&
+ page_to_nid(page) == nid) {
+ preempt_disable();
+ __count_numa_event(page_zone(page),
NUMA_INTERLEAVE_HIT);
+ preempt_enable();
+ }
+ }
+
+ return page;
+}
+
+struct folio *vma_alloc_folio_hints_noprof(gfp_t gfp, int order,
+ struct vm_area_struct *vma, unsigned long addr,
+ pghint_t *hints)
+{
+ struct mempolicy *pol;
+ pgoff_t ilx;
+ struct folio *folio;
+ struct page *page;
+
+ if (vma->vm_flags & VM_DROPPABLE)
+ gfp |= __GFP_NOWARN;
+
+ pol = get_vma_policy(vma, addr, order, &ilx);
+ page = alloc_pages_mpol_hints(gfp | __GFP_COMP, order, pol, ilx,
+ numa_node_id(), hints);
+ mpol_cond_put(pol);
+ if (!page)
+ return NULL;
+
+ set_page_refcounted(page);
+ folio = page_rmappable_folio(page);
+ return folio;
+}
+EXPORT_SYMBOL(vma_alloc_folio_hints_noprof);
+
struct page *alloc_frozen_pages_noprof(gfp_t gfp, unsigned order)
{
struct mempolicy *pol = &default_policy;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index edbb1edf463d..f7abbc46e725 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5222,14 +5222,17 @@ EXPORT_SYMBOL_GPL(alloc_pages_bulk_noprof);
/*
* This is the 'heart' of the zoned buddy allocator.
*/
-struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
- int preferred_nid, nodemask_t *nodemask)
+struct page *__alloc_frozen_pages_hints_noprof(gfp_t gfp, unsigned int order,
+ int preferred_nid, nodemask_t *nodemask, pghint_t *hints)
{
struct page *page;
unsigned int alloc_flags = ALLOC_WMARK_LOW;
gfp_t alloc_gfp; /* The gfp_t that was actually used for allocation */
struct alloc_context ac = { };
+ if (hints)
+ *hints = (pghint_t)0;
+
/*
* There are several places where we assume that the order value is sane
* so bail out early if the request is out of bound.
@@ -5285,6 +5288,14 @@ struct page *__alloc_frozen_pages_noprof(gfp_t gfp,
unsigned int order,
return page;
}
+EXPORT_SYMBOL(__alloc_frozen_pages_hints_noprof);
+
+struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
+ int preferred_nid, nodemask_t *nodemask)
+{
+ return __alloc_frozen_pages_hints_noprof(gfp, order, preferred_nid,
+ nodemask, NULL);
+}
EXPORT_SYMBOL(__alloc_frozen_pages_noprof);
struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order,
--
MST