The template fast path from the previous patch only accelerates head
pages. Compound tails in memmap_init_compound() still go through the
slow path one by one.

Build separate head and tail templates and reuse one prepared tail
template across the tail pages in a compound range. Head pages preserve
the existing refcount policy, while compound tails always start with a
refcount of 0 after prep_compound_tail().

This extends the template-copy fast path to pfns_per_compound > 1
without changing the existing slow path. Tail-page PFN-dependent fields
are refreshed in the reusable tail template before each copy.

Tested in a VM with a 100 GB devdax namespace (align=2097152) on Intel
Ice Lake server. This test exercises the dax_pmem rebind path and
measures memmap initialization latency.

Test procedure:
Unbind and rebind the dax_pmem driver 30 times, collect memmap
initialization time from the pr_debug() output of memmap_init_zone_device().

Base(v7.1-rc3):
  First binding: 1515 ms
  Average of subsequent rebinds: 313.45 ms

With this patch and its prerequisites applied:
  First binding: 1422 ms
  Average of subsequent rebinds: 240.42 ms

This reduces the average rebind time from 313.45 ms to 240.42 ms, or
about 23.3%.

Signed-off-by: Li Zhe <[email protected]>
Reviewed-by: Mike Rapoport (Microsoft) <[email protected]>
---
 mm/mm_init.c | 45 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/mm/mm_init.c b/mm/mm_init.c
index 53c0241c66b7..d5ccb49a048f 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1077,17 +1077,25 @@ static inline bool 
zone_device_page_init_optimization_enabled(void)
        return !page_ref_tracepoint_active(page_ref_set);
 }
 
-static inline void zone_device_template_page_init(struct page *template,
-                                                 unsigned long pfn,
-                                                 unsigned long zone_idx,
-                                                 int nid,
-                                                 struct dev_pagemap *pgmap)
+static inline void zone_device_template_head_page_init(struct page *template,
+               unsigned long pfn, unsigned long zone_idx, int nid,
+               struct dev_pagemap *pgmap)
 {
        __zone_device_page_init(template, pfn, zone_idx, nid, pgmap);
        if (!pagemap_resets_refcount(pgmap))
                set_page_count(template, 0);
 }
 
+static inline void zone_device_template_tail_page_init(struct page *template,
+               unsigned long pfn, unsigned long zone_idx, int nid,
+               struct dev_pagemap *pgmap, const struct page *head,
+               unsigned int order)
+{
+       __zone_device_page_init(template, pfn, zone_idx, nid, pgmap);
+       prep_compound_tail(template, head, order);
+       set_page_count(template, 0);
+}
+
 /*
  * 'template' is a reusable page prototype rather than a strictly immutable
  * object. Most ZONE_DEVICE fields stay constant across the pages covered by
@@ -1139,10 +1147,12 @@ static void __ref memmap_init_compound(struct page 
*head,
                                       unsigned long head_pfn,
                                       unsigned long zone_idx, int nid,
                                       struct dev_pagemap *pgmap,
-                                      unsigned long nr_pages)
+                                      unsigned long nr_pages,
+                                      bool use_template)
 {
        unsigned long pfn, end_pfn = head_pfn + nr_pages;
        unsigned int order = pgmap->vmemmap_shift;
+       struct page template;
 
        /*
         * We have to initialize the pages, including setting up page links.
@@ -1151,9 +1161,25 @@ static void __ref memmap_init_compound(struct page *head,
         * the pages in the same go.
         */
        __SetPageHead(head);
+
+       /*
+        * All tails of the same compound page share the state established by
+        * prep_compound_tail(). Reuse one tail template for the whole range and
+        * refresh only the PFN-dependent fields in that template before each 
copy.
+        */
+       if (use_template)
+               zone_device_template_tail_page_init(&template, head_pfn + 1,
+                                                   zone_idx, nid, pgmap,
+                                                   head, order);
+
        for (pfn = head_pfn + 1; pfn < end_pfn; pfn++) {
                struct page *page = pfn_to_page(pfn);
 
+               if (use_template) {
+                       zone_device_page_init_from_template(page, pfn,
+                                                           &template);
+                       continue;
+               }
                zone_device_page_init_slow(page, pfn, zone_idx, nid, pgmap);
                prep_compound_tail(page, head, order);
                set_page_count(page, 0);
@@ -1190,8 +1216,8 @@ void __ref memmap_init_zone_device(struct zone *zone,
        }
 
        if (use_template)
-               zone_device_template_page_init(&template, start_pfn, zone_idx,
-                                              nid, pgmap);
+               zone_device_template_head_page_init(&template, start_pfn,
+                                                   zone_idx, nid, pgmap);
 
        for (pfn = start_pfn; pfn < end_pfn; pfn += pfns_per_compound) {
                struct page *page = pfn_to_page(pfn);
@@ -1207,7 +1233,8 @@ void __ref memmap_init_zone_device(struct zone *zone,
                        continue;
 
                memmap_init_compound(page, pfn, zone_idx, nid, pgmap,
-                                    compound_nr_pages(altmap, pgmap));
+                                    compound_nr_pages(altmap, pgmap),
+                                    use_template);
        }
 
        pr_debug("%s initialised %lu pages in %ums\n", __func__,
-- 
2.20.1

Reply via email to