The template fast path from the previous patch only accelerates head
pages. Compound tails in memmap_init_compound() still go through the
slow path one by one.

Build separate head and tail templates and reuse one prepared tail
template across the tail pages in a compound range. Head pages keep the
zone_device_page_init_refcount() policy, while compound tails always
start with a refcount of 0 after prep_compound_tail().

This extends the template-copy fast path to pfns_per_compound > 1
without changing the existing slow path.

Tested in a VM with a 100 GB devdax namespace (align=2097152) on Intel
Ice Lake server. This test exercises the dax_pmem rebind path and
measures memmap initialization latency.

Test procedure:
Unbind and rebind the dax_pmem driver 30 times, collect memmap
initialization time from the pr_debug() output of memmap_init_zone_device().

Base(v7.1-rc3):
  First binding: 1515 ms
  Average of subsequent rebinds: 313.45 ms

With patches 1-4 applied:
  First binding: 1422 ms
  Average of subsequent rebinds: 256.56 ms

This reduces the average rebind time from 313.45 ms to 256.56 ms, or
about 18.1%.

Signed-off-by: Li Zhe <[email protected]>
---
 mm/mm_init.c | 51 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 39 insertions(+), 12 deletions(-)

diff --git a/mm/mm_init.c b/mm/mm_init.c
index 2992711351a0..17a84d4cda01 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1084,17 +1084,25 @@ static inline bool 
zone_device_page_init_optimization_enabled(void)
                IS_ALIGNED(sizeof(struct page), sizeof(u64));
 }
 
-static inline void zone_device_template_page_init(struct page *template,
-                                                 unsigned long pfn,
-                                                 unsigned long zone_idx,
-                                                 int nid,
-                                                 struct dev_pagemap *pgmap)
+static inline void zone_device_template_head_page_init(struct page *template,
+               unsigned long pfn, unsigned long zone_idx, int nid,
+               struct dev_pagemap *pgmap)
 {
        __zone_device_page_init(template, pfn, zone_idx, nid, pgmap);
        if (!zone_device_page_init_refcount(pgmap))
                set_page_count(template, 0);
 }
 
+static inline void zone_device_template_tail_page_init(struct page *template,
+               unsigned long pfn, unsigned long zone_idx, int nid,
+               struct dev_pagemap *pgmap, const struct page *head,
+               unsigned int order)
+{
+       __zone_device_page_init(template, pfn, zone_idx, nid, pgmap);
+       prep_compound_tail(template, head, order);
+       set_page_count(template, 0);
+}
+
 /*
  * The copied template already provides the PFN-invariant portion of a
  * ZONE_DEVICE struct page. Fix up the fields that still depend on @pfn
@@ -1144,10 +1152,12 @@ static void __ref memmap_init_compound(struct page 
*head,
                                       unsigned long head_pfn,
                                       unsigned long zone_idx, int nid,
                                       struct dev_pagemap *pgmap,
-                                      unsigned long nr_pages)
+                                      unsigned long nr_pages,
+                                      bool use_template)
 {
        unsigned long pfn, end_pfn = head_pfn + nr_pages;
        unsigned int order = pgmap->vmemmap_shift;
+       struct page template;
 
        /*
         * We have to initialize the pages, including setting up page links.
@@ -1156,12 +1166,28 @@ static void __ref memmap_init_compound(struct page 
*head,
         * the pages in the same go.
         */
        __SetPageHead(head);
+
+       /*
+        * A tail template can be reused for all tail pages in the same 
compound page
+        * because shared state for compound tails is pre-set by 
prep_compound_tail().
+        * The per-page page->virtual and section in flags are fixed up after 
copying.
+        */
+       if (use_template)
+               zone_device_template_tail_page_init(&template, head_pfn + 1,
+                                                   zone_idx, nid, pgmap,
+                                                   head, order);
+
        for (pfn = head_pfn + 1; pfn < end_pfn; pfn++) {
                struct page *page = pfn_to_page(pfn);
 
-               zone_device_page_init_slow(page, pfn, zone_idx, nid, pgmap);
-               prep_compound_tail(page, head, order);
-               set_page_count(page, 0);
+               if (use_template) {
+                       zone_device_page_init_from_template(page, pfn,
+                                                           &template);
+               } else {
+                       zone_device_page_init_slow(page, pfn, zone_idx, nid, 
pgmap);
+                       prep_compound_tail(page, head, order);
+                       set_page_count(page, 0);
+               }
        }
        prep_compound_head(head, order);
 }
@@ -1195,8 +1221,8 @@ void __ref memmap_init_zone_device(struct zone *zone,
        }
 
        if (use_template)
-               zone_device_template_page_init(&template, start_pfn, zone_idx,
-                                              nid, pgmap);
+               zone_device_template_head_page_init(&template, start_pfn,
+                                                   zone_idx, nid, pgmap);
 
        for (pfn = start_pfn; pfn < end_pfn; pfn += pfns_per_compound) {
                struct page *page = pfn_to_page(pfn);
@@ -1212,7 +1238,8 @@ void __ref memmap_init_zone_device(struct zone *zone,
                        continue;
 
                memmap_init_compound(page, pfn, zone_idx, nid, pgmap,
-                                    compound_nr_pages(altmap, pgmap));
+                                    compound_nr_pages(altmap, pgmap),
+                                    use_template);
        }
 
        pr_debug("%s initialised %lu pages in %ums\n", __func__,
-- 
2.20.1

Reply via email to