Hi, On 9/8/25 03:04, Balbir Singh wrote:
> Extend migrate_vma_collect_pmd() to handle partially mapped large > folios that require splitting before migration can proceed. > > During PTE walk in the collection phase, if a large folio is only > partially mapped in the migration range, it must be split to ensure > the folio is correctly migrated. > > Cc: Andrew Morton <[email protected]> > Cc: David Hildenbrand <[email protected]> > Cc: Zi Yan <[email protected]> > Cc: Joshua Hahn <[email protected]> > Cc: Rakie Kim <[email protected]> > Cc: Byungchul Park <[email protected]> > Cc: Gregory Price <[email protected]> > Cc: Ying Huang <[email protected]> > Cc: Alistair Popple <[email protected]> > Cc: Oscar Salvador <[email protected]> > Cc: Lorenzo Stoakes <[email protected]> > Cc: Baolin Wang <[email protected]> > Cc: "Liam R. Howlett" <[email protected]> > Cc: Nico Pache <[email protected]> > Cc: Ryan Roberts <[email protected]> > Cc: Dev Jain <[email protected]> > Cc: Barry Song <[email protected]> > Cc: Lyude Paul <[email protected]> > Cc: Danilo Krummrich <[email protected]> > Cc: David Airlie <[email protected]> > Cc: Simona Vetter <[email protected]> > Cc: Ralph Campbell <[email protected]> > Cc: Mika Penttilä <[email protected]> > Cc: Matthew Brost <[email protected]> > Cc: Francois Dugast <[email protected]> > > Signed-off-by: Balbir Singh <[email protected]> > --- > mm/migrate_device.c | 94 +++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 94 insertions(+) > > diff --git a/mm/migrate_device.c b/mm/migrate_device.c > index abd9f6850db6..f45ef182287d 100644 > --- a/mm/migrate_device.c > +++ b/mm/migrate_device.c > @@ -54,6 +54,53 @@ static int migrate_vma_collect_hole(unsigned long start, > return 0; > } > > +/** > + * migrate_vma_split_folio() - Helper function to split a THP folio > + * @folio: the folio to split > + * @fault_page: struct page associated with the fault if any > + * > + * Returns 0 on success > + */ > +static int migrate_vma_split_folio(struct folio *folio, > + struct page *fault_page) > +{ > + int ret; > + struct folio *fault_folio = fault_page ? page_folio(fault_page) : NULL; > + struct folio *new_fault_folio = NULL; > + > + if (folio != fault_folio) { > + folio_get(folio); > + folio_lock(folio); > + } > + > + ret = split_folio(folio); > + if (ret) { > + if (folio != fault_folio) { > + folio_unlock(folio); > + folio_put(folio); > + } > + return ret; > + } > + > + new_fault_folio = fault_page ? page_folio(fault_page) : NULL; > + > + /* > + * Ensure the lock is held on the correct > + * folio after the split > + */ > + if (!new_fault_folio) { > + folio_unlock(folio); > + folio_put(folio); > + } else if (folio != new_fault_folio) { > + folio_get(new_fault_folio); > + folio_lock(new_fault_folio); > + folio_unlock(folio); > + folio_put(folio); > + } > + > + return 0; > +} > + > static int migrate_vma_collect_pmd(pmd_t *pmdp, > unsigned long start, > unsigned long end, > @@ -136,6 +183,8 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, > * page table entry. Other special swap entries are not > * migratable, and we ignore regular swapped page. > */ > + struct folio *folio; > + > entry = pte_to_swp_entry(pte); > if (!is_device_private_entry(entry)) > goto next; > @@ -147,6 +196,29 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, > pgmap->owner != migrate->pgmap_owner) > goto next; > > + folio = page_folio(page); > + if (folio_test_large(folio)) { > + int ret; > + > + /* > + * The reason for finding pmd present with a > + * large folio for the pte is partial unmaps. > + * Split the folio now for the migration to be > + * handled correctly > + */ > + pte_unmap_unlock(ptep, ptl); > + ret = migrate_vma_split_folio(folio, > + migrate->fault_page); > + > + if (ret) { > + ptep = pte_offset_map_lock(mm, pmdp, > addr, &ptl); > + goto next; > + } > + > + addr = start; > + goto again; > + } > + > mpfn = migrate_pfn(page_to_pfn(page)) | > MIGRATE_PFN_MIGRATE; > if (is_writable_device_private_entry(entry)) > @@ -171,6 +243,28 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, > pgmap->owner != migrate->pgmap_owner) > goto next; > } > + folio = page_folio(page); > + if (folio_test_large(folio)) { > + int ret; > + > + /* > + * The reason for finding pmd present with a > + * large folio for the pte is partial unmaps. > + * Split the folio now for the migration to be > + * handled correctly > + */ This comment is still not changed, there are other reasons for pte mapped large pages. Also now all the mTHPs are splitted, which is change of behavior (currently ignored) for order < PMD_ORDER. > + pte_unmap_unlock(ptep, ptl); > + ret = migrate_vma_split_folio(folio, > + migrate->fault_page); > + > + if (ret) { > + ptep = pte_offset_map_lock(mm, pmdp, > addr, &ptl); > + goto next; > + } > + > + addr = start; > + goto again; > + } > mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; > mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0; > } --Mika
