On 10/13/25 02:46, Lance Yang wrote: > On Wed, Oct 1, 2025 at 4:20 PM Balbir Singh <[email protected]> wrote: >> >> Extend core huge page management functions to handle device-private THP >> entries. This enables proper handling of large device-private folios in >> fundamental MM operations. >> >> The following functions have been updated: >> >> - copy_huge_pmd(): Handle device-private entries during fork/clone >> - zap_huge_pmd(): Properly free device-private THP during munmap >> - change_huge_pmd(): Support protection changes on device-private THP >> - __pte_offset_map(): Add device-private entry awareness >> >> Cc: David Hildenbrand <[email protected]> >> Cc: Zi Yan <[email protected]> >> Cc: Joshua Hahn <[email protected]> >> Cc: Rakie Kim <[email protected]> >> Cc: Byungchul Park <[email protected]> >> Cc: Gregory Price <[email protected]> >> Cc: Ying Huang <[email protected]> >> Cc: Alistair Popple <[email protected]> >> Cc: Oscar Salvador <[email protected]> >> Cc: Lorenzo Stoakes <[email protected]> >> Cc: Baolin Wang <[email protected]> >> Cc: "Liam R. Howlett" <[email protected]> >> Cc: Nico Pache <[email protected]> >> Cc: Ryan Roberts <[email protected]> >> Cc: Dev Jain <[email protected]> >> Cc: Barry Song <[email protected]> >> Cc: Lyude Paul <[email protected]> >> Cc: Danilo Krummrich <[email protected]> >> Cc: David Airlie <[email protected]> >> Cc: Simona Vetter <[email protected]> >> Cc: Ralph Campbell <[email protected]> >> Cc: Mika Penttilä <[email protected]> >> Cc: Matthew Brost <[email protected]> >> Cc: Francois Dugast <[email protected]> >> Cc: Andrew Morton <[email protected]> >> Acked-by: Zi Yan <[email protected]> >> Signed-off-by: Matthew Brost <[email protected]> >> Signed-off-by: Balbir Singh <[email protected]> >> --- >> include/linux/swapops.h | 32 +++++++++++++++++++++++ >> mm/huge_memory.c | 56 ++++++++++++++++++++++++++++++++++------- >> mm/pgtable-generic.c | 2 +- >> 3 files changed, 80 insertions(+), 10 deletions(-) >> >> diff --git a/include/linux/swapops.h b/include/linux/swapops.h >> index 64ea151a7ae3..2687928a8146 100644 >> --- a/include/linux/swapops.h >> +++ b/include/linux/swapops.h >> @@ -594,10 +594,42 @@ static inline int is_pmd_migration_entry(pmd_t pmd) >> } >> #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ >> >> +#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_ENABLE_THP_MIGRATION) >> + >> +/** >> + * is_pmd_device_private_entry() - Check if PMD contains a device private >> swap entry >> + * @pmd: The PMD to check >> + * >> + * Returns true if the PMD contains a swap entry that represents a device >> private >> + * page mapping. This is used for zone device private pages that have been >> + * swapped out but still need special handling during various memory >> management >> + * operations. >> + * >> + * Return: 1 if PMD contains device private entry, 0 otherwise >> + */ >> +static inline int is_pmd_device_private_entry(pmd_t pmd) >> +{ >> + return is_swap_pmd(pmd) && >> is_device_private_entry(pmd_to_swp_entry(pmd)); >> +} >> + >> +#else /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ >> + >> +static inline int is_pmd_device_private_entry(pmd_t pmd) >> +{ >> + return 0; >> +} >> + >> +#endif /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ >> + >> static inline int non_swap_entry(swp_entry_t entry) >> { >> return swp_type(entry) >= MAX_SWAPFILES; >> } >> >> +static inline int is_pmd_non_present_folio_entry(pmd_t pmd) >> +{ >> + return is_pmd_migration_entry(pmd) || >> is_pmd_device_private_entry(pmd); >> +} >> + >> #endif /* CONFIG_MMU */ >> #endif /* _LINUX_SWAPOPS_H */ >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >> index 1b81680b4225..8e0a1747762d 100644 >> --- a/mm/huge_memory.c >> +++ b/mm/huge_memory.c >> @@ -1703,17 +1703,45 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct >> mm_struct *src_mm, >> if (unlikely(is_swap_pmd(pmd))) { >> swp_entry_t entry = pmd_to_swp_entry(pmd); >> >> - VM_BUG_ON(!is_pmd_migration_entry(pmd)); >> - if (!is_readable_migration_entry(entry)) { >> - entry = make_readable_migration_entry( >> - swp_offset(entry)); >> + VM_WARN_ON(!is_pmd_non_present_folio_entry(pmd)); >> + >> + if (is_writable_migration_entry(entry) || >> + is_readable_exclusive_migration_entry(entry)) { >> + entry = >> make_readable_migration_entry(swp_offset(entry)); >> pmd = swp_entry_to_pmd(entry); >> if (pmd_swp_soft_dirty(*src_pmd)) >> pmd = pmd_swp_mksoft_dirty(pmd); >> if (pmd_swp_uffd_wp(*src_pmd)) >> pmd = pmd_swp_mkuffd_wp(pmd); >> set_pmd_at(src_mm, addr, src_pmd, pmd); >> + } else if (is_device_private_entry(entry)) { >> + /* >> + * For device private entries, since there are no >> + * read exclusive entries, writable = !readable >> + */ >> + if (is_writable_device_private_entry(entry)) { >> + entry = >> make_readable_device_private_entry(swp_offset(entry)); >> + pmd = swp_entry_to_pmd(entry); >> + >> + if (pmd_swp_soft_dirty(*src_pmd)) >> + pmd = pmd_swp_mksoft_dirty(pmd); >> + if (pmd_swp_uffd_wp(*src_pmd)) >> + pmd = pmd_swp_mkuffd_wp(pmd); >> + set_pmd_at(src_mm, addr, src_pmd, pmd); >> + } >> + >> + src_folio = pfn_swap_entry_folio(entry); >> + VM_WARN_ON(!folio_test_large(src_folio)); >> + >> + folio_get(src_folio); >> + /* >> + * folio_try_dup_anon_rmap_pmd does not fail for >> + * device private entries. >> + */ >> + folio_try_dup_anon_rmap_pmd(src_folio, >> &src_folio->page, >> + dst_vma, src_vma); >> } >> + >> add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); >> mm_inc_nr_ptes(dst_mm); >> pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); >> @@ -2211,15 +2239,16 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct >> vm_area_struct *vma, >> folio_remove_rmap_pmd(folio, page, vma); >> WARN_ON_ONCE(folio_mapcount(folio) < 0); >> VM_BUG_ON_PAGE(!PageHead(page), page); >> - } else if (thp_migration_supported()) { >> + } else if (is_pmd_non_present_folio_entry(orig_pmd)) { >> swp_entry_t entry; >> >> - VM_BUG_ON(!is_pmd_migration_entry(orig_pmd)); >> entry = pmd_to_swp_entry(orig_pmd); >> folio = pfn_swap_entry_folio(entry); >> flush_needed = 0; >> - } else >> - WARN_ONCE(1, "Non present huge pmd without pmd >> migration enabled!"); >> + >> + if (!thp_migration_supported()) >> + WARN_ONCE(1, "Non present huge pmd without >> pmd migration enabled!"); >> + } >> >> if (folio_test_anon(folio)) { >> zap_deposited_table(tlb->mm, pmd); >> @@ -2239,6 +2268,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct >> vm_area_struct *vma, >> folio_mark_accessed(folio); >> } >> >> + if (folio_is_device_private(folio)) { >> + folio_remove_rmap_pmd(folio, &folio->page, vma); >> + WARN_ON_ONCE(folio_mapcount(folio) < 0); >> + folio_put(folio); >> + } > > IIUC, a device-private THP is always anonymous, right? would it make sense > to move this folio_is_device_private() block inside the folio_test_anon() > check above? > Yes, they are, there is discussion on file-backed mapping at https://lwn.net/Articles/1016124/. I don't see a benefit from moving it, do you?
Balbir [...]
