migrate_device: add THP splitting during migration

Zi Yan Sat, 18 Oct 2025 11:02:22 -0700

On 1 Oct 2025, at 2:57, Balbir Singh wrote:

> Implement migrate_vma_split_pages() to handle THP splitting during the
> migration process when destination cannot allocate compound pages.
>
> This addresses the common scenario where migrate_vma_setup() succeeds with
> MIGRATE_PFN_COMPOUND pages, but the destination device cannot allocate
> large pages during the migration phase.
>
> Key changes:
> - migrate_vma_split_pages(): Split already-isolated pages during migration
> - Enhanced folio_split() and __split_unmapped_folio() with isolated
>   parameter to avoid redundant unmap/remap operations
>
> This provides a fallback mechansim to ensure migration succeeds even when
> large page allocation fails at the destination.
>
> Cc: Andrew Morton <[email protected]>
> Cc: David Hildenbrand <[email protected]>
> Cc: Zi Yan <[email protected]>
> Cc: Joshua Hahn <[email protected]>
> Cc: Rakie Kim <[email protected]>
> Cc: Byungchul Park <[email protected]>
> Cc: Gregory Price <[email protected]>
> Cc: Ying Huang <[email protected]>
> Cc: Alistair Popple <[email protected]>
> Cc: Oscar Salvador <[email protected]>
> Cc: Lorenzo Stoakes <[email protected]>
> Cc: Baolin Wang <[email protected]>
> Cc: "Liam R. Howlett" <[email protected]>
> Cc: Nico Pache <[email protected]>
> Cc: Ryan Roberts <[email protected]>
> Cc: Dev Jain <[email protected]>
> Cc: Barry Song <[email protected]>
> Cc: Lyude Paul <[email protected]>
> Cc: Danilo Krummrich <[email protected]>
> Cc: David Airlie <[email protected]>
> Cc: Simona Vetter <[email protected]>
> Cc: Ralph Campbell <[email protected]>
> Cc: Mika Penttilä <[email protected]>
> Cc: Matthew Brost <[email protected]>
> Cc: Francois Dugast <[email protected]>
>
> Signed-off-by: Balbir Singh <[email protected]>
> ---
>  include/linux/huge_mm.h | 11 +++++-
>  lib/test_hmm.c          |  9 +++++
>  mm/huge_memory.c        | 46 ++++++++++++----------
>  mm/migrate_device.c     | 85 +++++++++++++++++++++++++++++++++++------
>  4 files changed, 117 insertions(+), 34 deletions(-)
>
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 2d669be7f1c8..a166be872628 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -365,8 +365,8 @@ unsigned long thp_get_unmapped_area_vmflags(struct file 
> *filp, unsigned long add
>               vm_flags_t vm_flags);
>
>  bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
> -int split_huge_page_to_list_to_order(struct page *page, struct list_head 
> *list,
> -             unsigned int new_order);
> +int __split_huge_page_to_list_to_order(struct page *page, struct list_head 
> *list,
> +             unsigned int new_order, bool unmapped);
>  int min_order_for_split(struct folio *folio);
>  int split_folio_to_list(struct folio *folio, struct list_head *list);
>  bool uniform_split_supported(struct folio *folio, unsigned int new_order,
> @@ -375,6 +375,13 @@ bool non_uniform_split_supported(struct folio *folio, 
> unsigned int new_order,
>               bool warns);
>  int folio_split(struct folio *folio, unsigned int new_order, struct page 
> *page,
>               struct list_head *list);
> +
> +static inline int split_huge_page_to_list_to_order(struct page *page, struct 
> list_head *list,
> +             unsigned int new_order)
> +{
> +     return __split_huge_page_to_list_to_order(page, list, new_order, false);
> +}
> +
>  /*
>   * try_folio_split - try to split a @folio at @page using non uniform split.
>   * @folio: folio to be split
> diff --git a/lib/test_hmm.c b/lib/test_hmm.c
> index 46fa9e200db8..df429670633e 100644
> --- a/lib/test_hmm.c
> +++ b/lib/test_hmm.c
> @@ -1612,6 +1612,15 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault 
> *vmf)
>       order = folio_order(page_folio(vmf->page));
>       nr = 1 << order;
>
> +     /*
> +      * When folios are partially mapped, we can't rely on the folio
> +      * order of vmf->page as the folio might not be fully split yet
> +      */
> +     if (vmf->pte) {
> +             order = 0;
> +             nr = 1;
> +     }
> +
>       /*
>        * Consider a per-cpu cache of src and dst pfns, but with
>        * large number of cpus that might not scale well.
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 8c95a658b3ec..022b0729f826 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -3463,15 +3463,6 @@ static void __split_folio_to_order(struct folio 
> *folio, int old_order,
>               new_folio->mapping = folio->mapping;
>               new_folio->index = folio->index + i;
>
> -             /*
> -              * page->private should not be set in tail pages. Fix up and 
> warn once
> -              * if private is unexpectedly set.
> -              */
> -             if (unlikely(new_folio->private)) {
> -                     VM_WARN_ON_ONCE_PAGE(true, new_head);
> -                     new_folio->private = NULL;
> -             }
> -
>               if (folio_test_swapcache(folio))
>                       new_folio->swap.val = folio->swap.val + i;
>
> @@ -3700,6 +3691,7 @@ bool uniform_split_supported(struct folio *folio, 
> unsigned int new_order,
>   * @lock_at: a page within @folio to be left locked to caller
>   * @list: after-split folios will be put on it if non NULL
>   * @uniform_split: perform uniform split or not (non-uniform split)
> + * @unmapped: The pages are already unmapped, they are migration entries.
>   *
>   * It calls __split_unmapped_folio() to perform uniform and non-uniform 
> split.
>   * It is in charge of checking whether the split is supported or not and
> @@ -3715,7 +3707,7 @@ bool uniform_split_supported(struct folio *folio, 
> unsigned int new_order,
>   */
>  static int __folio_split(struct folio *folio, unsigned int new_order,
>               struct page *split_at, struct page *lock_at,
> -             struct list_head *list, bool uniform_split)
> +             struct list_head *list, bool uniform_split, bool unmapped)
>  {
>       struct deferred_split *ds_queue = get_deferred_split_queue(folio);
>       XA_STATE(xas, &folio->mapping->i_pages, folio->index);
> @@ -3765,13 +3757,15 @@ static int __folio_split(struct folio *folio, 
> unsigned int new_order,
>                * is taken to serialise against parallel split or collapse
>                * operations.
>                */
> -             anon_vma = folio_get_anon_vma(folio);
> -             if (!anon_vma) {
> -                     ret = -EBUSY;
> -                     goto out;
> +             if (!unmapped) {
> +                     anon_vma = folio_get_anon_vma(folio);
> +                     if (!anon_vma) {
> +                             ret = -EBUSY;
> +                             goto out;
> +                     }
> +                     anon_vma_lock_write(anon_vma);
>               }
>               mapping = NULL;
> -             anon_vma_lock_write(anon_vma);
>       } else {
>               unsigned int min_order;
>               gfp_t gfp;
> @@ -3838,7 +3832,8 @@ static int __folio_split(struct folio *folio, unsigned 
> int new_order,
>               goto out_unlock;
>       }
>
> -     unmap_folio(folio);
> +     if (!unmapped)
> +             unmap_folio(folio);
>
>       /* block interrupt reentry in xa_lock and spinlock */
>       local_irq_disable();
> @@ -3925,10 +3920,13 @@ static int __folio_split(struct folio *folio, 
> unsigned int new_order,
>
>                       next = folio_next(new_folio);
>
> +                     zone_device_private_split_cb(folio, new_folio);
> +
>                       expected_refs = folio_expected_ref_count(new_folio) + 1;
>                       folio_ref_unfreeze(new_folio, expected_refs);
>
> -                     lru_add_split_folio(folio, new_folio, lruvec, list);
> +                     if (!unmapped)
> +                             lru_add_split_folio(folio, new_folio, lruvec, 
> list);
>
>                       /*
>                        * Anonymous folio with swap cache.
> @@ -3959,6 +3957,8 @@ static int __folio_split(struct folio *folio, unsigned 
> int new_order,
>                       __filemap_remove_folio(new_folio, NULL);
>                       folio_put_refs(new_folio, nr_pages);
>               }
> +
> +             zone_device_private_split_cb(folio, NULL);
>               /*
>                * Unfreeze @folio only after all page cache entries, which
>                * used to point to it, have been updated with new folios.
> @@ -3982,6 +3982,9 @@ static int __folio_split(struct folio *folio, unsigned 
> int new_order,
>
>       local_irq_enable();
>
> +     if (unmapped)
> +             return ret;
> +
>       if (nr_shmem_dropped)
>               shmem_uncharge(mapping->host, nr_shmem_dropped);
>
> @@ -4072,12 +4075,13 @@ static int __folio_split(struct folio *folio, 
> unsigned int new_order,
>   * Returns -EINVAL when trying to split to an order that is incompatible
>   * with the folio. Splitting to order 0 is compatible with all folios.
>   */
> -int split_huge_page_to_list_to_order(struct page *page, struct list_head 
> *list,
> -                                  unsigned int new_order)
> +int __split_huge_page_to_list_to_order(struct page *page, struct list_head 
> *list,
> +                                  unsigned int new_order, bool unmapped)
>  {
>       struct folio *folio = page_folio(page);
>
> -     return __folio_split(folio, new_order, &folio->page, page, list, true);
> +     return __folio_split(folio, new_order, &folio->page, page, list, true,
> +                             unmapped);
>  }
>
>  /*
> @@ -4106,7 +4110,7 @@ int folio_split(struct folio *folio, unsigned int 
> new_order,
>               struct page *split_at, struct list_head *list)
>  {
>       return __folio_split(folio, new_order, split_at, &folio->page, list,
> -                     false);
> +                     false, false);
>  }
>
>  int min_order_for_split(struct folio *folio)
> diff --git a/mm/migrate_device.c b/mm/migrate_device.c
> index 4156fd6190d2..fa42d2ebd024 100644
> --- a/mm/migrate_device.c
> +++ b/mm/migrate_device.c
> @@ -306,6 +306,23 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
>                           pgmap->owner != migrate->pgmap_owner)
>                               goto next;
>
> +                     folio = page_folio(page);
> +                     if (folio_test_large(folio)) {
> +                             int ret;
> +
> +                             pte_unmap_unlock(ptep, ptl);
> +                             ret = migrate_vma_split_folio(folio,
> +                                                       migrate->fault_page);
> +
> +                             if (ret) {
> +                                     ptep = pte_offset_map_lock(mm, pmdp, 
> addr, &ptl);
> +                                     goto next;
> +                             }
> +
> +                             addr = start;
> +                             goto again;
> +                     }
> +
>                       mpfn = migrate_pfn(page_to_pfn(page)) |
>                                       MIGRATE_PFN_MIGRATE;
>                       if (is_writable_device_private_entry(entry))
> @@ -880,6 +897,29 @@ static int migrate_vma_insert_huge_pmd_page(struct 
> migrate_vma *migrate,
>               src[i] &= ~MIGRATE_PFN_MIGRATE;
>       return 0;
>  }
> +
> +static int migrate_vma_split_unmapped_folio(struct migrate_vma *migrate,
> +                                         unsigned long idx, unsigned long 
> addr,
> +                                         struct folio *folio)
> +{
> +     unsigned long i;
> +     unsigned long pfn;
> +     unsigned long flags;
> +     int ret = 0;
> +
> +     folio_get(folio);
> +     split_huge_pmd_address(migrate->vma, addr, true);
> +     ret = __split_huge_page_to_list_to_order(folio_page(folio, 0), NULL,
> +                                                     0, true);


Why not just call __split_unmapped_folio() here? Then, you do not need to add
a new unmapped parameter in __folio_split().


> +     if (ret)
> +             return ret;
> +     migrate->src[idx] &= ~MIGRATE_PFN_COMPOUND;
> +     flags = migrate->src[idx] & ((1UL << MIGRATE_PFN_SHIFT) - 1);
> +     pfn = migrate->src[idx] >> MIGRATE_PFN_SHIFT;
> +     for (i = 1; i < HPAGE_PMD_NR; i++)
> +             migrate->src[i+idx] = migrate_pfn(pfn + i) | flags;
> +     return ret;
> +}
>  #else /* !CONFIG_ARCH_ENABLE_THP_MIGRATION */
>  static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate,
>                                        unsigned long addr,
> @@ -889,6 +929,13 @@ static int migrate_vma_insert_huge_pmd_page(struct 
> migrate_vma *migrate,
>  {
>       return 0;
>  }
> +
> +static int migrate_vma_split_unmapped_folio(struct migrate_vma *migrate,
> +                                         unsigned long idx, unsigned long 
> addr,
> +                                         struct folio *folio)
> +{
> +     return 0;
> +}
>  #endif
>
>  static unsigned long migrate_vma_nr_pages(unsigned long *src)
> @@ -1050,8 +1097,9 @@ static void __migrate_device_pages(unsigned long 
> *src_pfns,
>                               struct migrate_vma *migrate)
>  {
>       struct mmu_notifier_range range;
> -     unsigned long i;
> +     unsigned long i, j;
>       bool notified = false;
> +     unsigned long addr;
>
>       for (i = 0; i < npages; ) {
>               struct page *newpage = migrate_pfn_to_page(dst_pfns[i]);
> @@ -1093,12 +1141,16 @@ static void __migrate_device_pages(unsigned long 
> *src_pfns,
>                               (!(dst_pfns[i] & MIGRATE_PFN_COMPOUND))) {
>                               nr = migrate_vma_nr_pages(&src_pfns[i]);
>                               src_pfns[i] &= ~MIGRATE_PFN_COMPOUND;
> -                             src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
> -                             goto next;
> +                     } else {
> +                             nr = 1;
>                       }
>
> -                     migrate_vma_insert_page(migrate, addr, &dst_pfns[i],
> -                                             &src_pfns[i]);
> +                     for (j = 0; j < nr && i + j < npages; j++) {
> +                             src_pfns[i+j] |= MIGRATE_PFN_MIGRATE;
> +                             migrate_vma_insert_page(migrate,
> +                                     addr + j * PAGE_SIZE,
> +                                     &dst_pfns[i+j], &src_pfns[i+j]);
> +                     }
>                       goto next;
>               }
>
> @@ -1120,7 +1172,13 @@ static void __migrate_device_pages(unsigned long 
> *src_pfns,
>                                                        MIGRATE_PFN_COMPOUND);
>                                       goto next;
>                               }
> -                             src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
> +                             nr = 1 << folio_order(folio);
> +                             addr = migrate->start + i * PAGE_SIZE;
> +                             if (migrate_vma_split_unmapped_folio(migrate, 
> i, addr, folio)) {
> +                                     src_pfns[i] &= ~(MIGRATE_PFN_MIGRATE |
> +                                                      MIGRATE_PFN_COMPOUND);
> +                                     goto next;
> +                             }
>                       } else if ((src_pfns[i] & MIGRATE_PFN_MIGRATE) &&
>                               (dst_pfns[i] & MIGRATE_PFN_COMPOUND) &&
>                               !(src_pfns[i] & MIGRATE_PFN_COMPOUND)) {
> @@ -1156,11 +1214,16 @@ static void __migrate_device_pages(unsigned long 
> *src_pfns,
>
>               if (migrate && migrate->fault_page == page)
>                       extra_cnt = 1;
> -             r = folio_migrate_mapping(mapping, newfolio, folio, extra_cnt);
> -             if (r)
> -                     src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
> -             else
> -                     folio_migrate_flags(newfolio, folio);
> +             for (j = 0; j < nr && i + j < npages; j++) {
> +                     folio = page_folio(migrate_pfn_to_page(src_pfns[i+j]));
> +                     newfolio = 
> page_folio(migrate_pfn_to_page(dst_pfns[i+j]));
> +
> +                     r = folio_migrate_mapping(mapping, newfolio, folio, 
> extra_cnt);
> +                     if (r)
> +                             src_pfns[i+j] &= ~MIGRATE_PFN_MIGRATE;
> +                     else
> +                             folio_migrate_flags(newfolio, folio);
> +             }
>  next:
>               i += nr;
>       }
> -- 
> 2.51.0


--
Best Regards,
Yan, Zi

Re: [v7 11/16] mm/migrate_device: add THP splitting during migration

Reply via email to