On 9/8/25 14:14, Mika Penttilä wrote: > Hi, > > On 9/8/25 03:04, Balbir Singh wrote: > >> Extend migrate_vma_collect_pmd() to handle partially mapped large >> folios that require splitting before migration can proceed. >> >> During PTE walk in the collection phase, if a large folio is only >> partially mapped in the migration range, it must be split to ensure >> the folio is correctly migrated. >> >> Cc: Andrew Morton <[email protected]> >> Cc: David Hildenbrand <[email protected]> >> Cc: Zi Yan <[email protected]> >> Cc: Joshua Hahn <[email protected]> >> Cc: Rakie Kim <[email protected]> >> Cc: Byungchul Park <[email protected]> >> Cc: Gregory Price <[email protected]> >> Cc: Ying Huang <[email protected]> >> Cc: Alistair Popple <[email protected]> >> Cc: Oscar Salvador <[email protected]> >> Cc: Lorenzo Stoakes <[email protected]> >> Cc: Baolin Wang <[email protected]> >> Cc: "Liam R. Howlett" <[email protected]> >> Cc: Nico Pache <[email protected]> >> Cc: Ryan Roberts <[email protected]> >> Cc: Dev Jain <[email protected]> >> Cc: Barry Song <[email protected]> >> Cc: Lyude Paul <[email protected]> >> Cc: Danilo Krummrich <[email protected]> >> Cc: David Airlie <[email protected]> >> Cc: Simona Vetter <[email protected]> >> Cc: Ralph Campbell <[email protected]> >> Cc: Mika Penttilä <[email protected]> >> Cc: Matthew Brost <[email protected]> >> Cc: Francois Dugast <[email protected]> >> >> Signed-off-by: Balbir Singh <[email protected]> >> --- >> mm/migrate_device.c | 94 +++++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 94 insertions(+) >> >> diff --git a/mm/migrate_device.c b/mm/migrate_device.c >> index abd9f6850db6..f45ef182287d 100644 >> --- a/mm/migrate_device.c >> +++ b/mm/migrate_device.c >> @@ -54,6 +54,53 @@ static int migrate_vma_collect_hole(unsigned long start, >> return 0; >> } >> >> +/** >> + * migrate_vma_split_folio() - Helper function to split a THP folio >> + * @folio: the folio to split >> + * @fault_page: struct page associated with the fault if any >> + * >> + * Returns 0 on success >> + */ >> +static int migrate_vma_split_folio(struct folio *folio, >> + struct page *fault_page) >> +{ >> + int ret; >> + struct folio *fault_folio = fault_page ? page_folio(fault_page) : NULL; >> + struct folio *new_fault_folio = NULL; >> + >> + if (folio != fault_folio) { >> + folio_get(folio); >> + folio_lock(folio); >> + } >> + >> + ret = split_folio(folio); >> + if (ret) { >> + if (folio != fault_folio) { >> + folio_unlock(folio); >> + folio_put(folio); >> + } >> + return ret; >> + } >> + >> + new_fault_folio = fault_page ? page_folio(fault_page) : NULL; >> + >> + /* >> + * Ensure the lock is held on the correct >> + * folio after the split >> + */ >> + if (!new_fault_folio) { >> + folio_unlock(folio); >> + folio_put(folio); >> + } else if (folio != new_fault_folio) { >> + folio_get(new_fault_folio); >> + folio_lock(new_fault_folio); >> + folio_unlock(folio); >> + folio_put(folio); >> + } >> + >> + return 0; >> +} >> + >> static int migrate_vma_collect_pmd(pmd_t *pmdp, >> unsigned long start, >> unsigned long end, >> @@ -136,6 +183,8 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, >> * page table entry. Other special swap entries are not >> * migratable, and we ignore regular swapped page. >> */ >> + struct folio *folio; >> + >> entry = pte_to_swp_entry(pte); >> if (!is_device_private_entry(entry)) >> goto next; >> @@ -147,6 +196,29 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, >> pgmap->owner != migrate->pgmap_owner) >> goto next; >> >> + folio = page_folio(page); >> + if (folio_test_large(folio)) { >> + int ret; >> + >> + /* >> + * The reason for finding pmd present with a >> + * large folio for the pte is partial unmaps. >> + * Split the folio now for the migration to be >> + * handled correctly >> + */ >> + pte_unmap_unlock(ptep, ptl); >> + ret = migrate_vma_split_folio(folio, >> + migrate->fault_page); >> + >> + if (ret) { >> + ptep = pte_offset_map_lock(mm, pmdp, >> addr, &ptl); >> + goto next; >> + } >> + >> + addr = start; >> + goto again; >> + } >> + >> mpfn = migrate_pfn(page_to_pfn(page)) | >> MIGRATE_PFN_MIGRATE; >> if (is_writable_device_private_entry(entry)) >> @@ -171,6 +243,28 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, >> pgmap->owner != migrate->pgmap_owner) >> goto next; >> } >> + folio = page_folio(page); >> + if (folio_test_large(folio)) { >> + int ret; >> + >> + /* >> + * The reason for finding pmd present with a >> + * large folio for the pte is partial unmaps. >> + * Split the folio now for the migration to be >> + * handled correctly >> + */ > > This comment is still not changed, there are other reasons for pte mapped > large pages. > Also now all the mTHPs are splitted, which is change of behavior (currently > ignored) > for order < PMD_ORDER.
Oh! sorry I missed it. I am attaching the version with the comments removed. On the behaviour change, I agree, but it is required for migration to occur. Updated patch below: mm/migrate_device: handle partially mapped folios during collection Extend migrate_vma_collect_pmd() to handle partially mapped large folios that require splitting before migration can proceed. During PTE walk in the collection phase, if a large folio is only partially mapped in the migration range, it must be split to ensure the folio is correctly migrated. Cc: Andrew Morton <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Zi Yan <[email protected]> Cc: Joshua Hahn <[email protected]> Cc: Rakie Kim <[email protected]> Cc: Byungchul Park <[email protected]> Cc: Gregory Price <[email protected]> Cc: Ying Huang <[email protected]> Cc: Alistair Popple <[email protected]> Cc: Oscar Salvador <[email protected]> Cc: Lorenzo Stoakes <[email protected]> Cc: Baolin Wang <[email protected]> Cc: "Liam R. Howlett" <[email protected]> Cc: Nico Pache <[email protected]> Cc: Ryan Roberts <[email protected]> Cc: Dev Jain <[email protected]> Cc: Barry Song <[email protected]> Cc: Lyude Paul <[email protected]> Cc: Danilo Krummrich <[email protected]> Cc: David Airlie <[email protected]> Cc: Simona Vetter <[email protected]> Cc: Ralph Campbell <[email protected]> Cc: Mika Penttilä <[email protected]> Cc: Matthew Brost <[email protected]> Cc: Francois Dugast <[email protected]> Signed-off-by: Balbir Singh <[email protected]> --- mm/migrate_device.c | 82 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/mm/migrate_device.c b/mm/migrate_device.c index abd9f6850db6..0afdc8b67c60 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -54,6 +54,53 @@ static int migrate_vma_collect_hole(unsigned long start, return 0; } +/** + * migrate_vma_split_folio() - Helper function to split a THP folio + * @folio: the folio to split + * @fault_page: struct page associated with the fault if any + * + * Returns 0 on success + */ +static int migrate_vma_split_folio(struct folio *folio, + struct page *fault_page) +{ + int ret; + struct folio *fault_folio = fault_page ? page_folio(fault_page) : NULL; + struct folio *new_fault_folio = NULL; + + if (folio != fault_folio) { + folio_get(folio); + folio_lock(folio); + } + + ret = split_folio(folio); + if (ret) { + if (folio != fault_folio) { + folio_unlock(folio); + folio_put(folio); + } + return ret; + } + + new_fault_folio = fault_page ? page_folio(fault_page) : NULL; + + /* + * Ensure the lock is held on the correct + * folio after the split + */ + if (!new_fault_folio) { + folio_unlock(folio); + folio_put(folio); + } else if (folio != new_fault_folio) { + folio_get(new_fault_folio); + folio_lock(new_fault_folio); + folio_unlock(folio); + folio_put(folio); + } + + return 0; +} + static int migrate_vma_collect_pmd(pmd_t *pmdp, unsigned long start, unsigned long end, @@ -136,6 +183,8 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, * page table entry. Other special swap entries are not * migratable, and we ignore regular swapped page. */ + struct folio *folio; + entry = pte_to_swp_entry(pte); if (!is_device_private_entry(entry)) goto next; @@ -147,6 +196,23 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, pgmap->owner != migrate->pgmap_owner) goto next; + folio = page_folio(page); + if (folio_test_large(folio)) { + int ret; + + pte_unmap_unlock(ptep, ptl); + ret = migrate_vma_split_folio(folio, + migrate->fault_page); + + if (ret) { + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + goto next; + } + + addr = start; + goto again; + } + mpfn = migrate_pfn(page_to_pfn(page)) | MIGRATE_PFN_MIGRATE; if (is_writable_device_private_entry(entry)) @@ -171,6 +237,22 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, pgmap->owner != migrate->pgmap_owner) goto next; } + folio = page_folio(page); + if (folio_test_large(folio)) { + int ret; + + pte_unmap_unlock(ptep, ptl); + ret = migrate_vma_split_folio(folio, + migrate->fault_page); + + if (ret) { + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + goto next; + } + + addr = start; + goto again; + } mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0; } -- 2.50.1 Balbir Singh
