Implement CPU fault handling for zone device THP entries through do_huge_pmd_device_private(), enabling transparent migration of device-private large pages back to system memory on CPU access.
When the CPU accesses a zone device THP entry, the fault handler calls the device driver's migrate_to_ram() callback to migrate the entire large page back to system memory. Cc: Andrew Morton <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Zi Yan <[email protected]> Cc: Joshua Hahn <[email protected]> Cc: Rakie Kim <[email protected]> Cc: Byungchul Park <[email protected]> Cc: Gregory Price <[email protected]> Cc: Ying Huang <[email protected]> Cc: Alistair Popple <[email protected]> Cc: Oscar Salvador <[email protected]> Cc: Lorenzo Stoakes <[email protected]> Cc: Baolin Wang <[email protected]> Cc: "Liam R. Howlett" <[email protected]> Cc: Nico Pache <[email protected]> Cc: Ryan Roberts <[email protected]> Cc: Dev Jain <[email protected]> Cc: Barry Song <[email protected]> Cc: Lyude Paul <[email protected]> Cc: Danilo Krummrich <[email protected]> Cc: David Airlie <[email protected]> Cc: Simona Vetter <[email protected]> Cc: Ralph Campbell <[email protected]> Cc: Mika Penttilä <[email protected]> Cc: Matthew Brost <[email protected]> Cc: Francois Dugast <[email protected]> Signed-off-by: Balbir Singh <[email protected]> --- include/linux/huge_mm.h | 7 +++++++ mm/huge_memory.c | 38 ++++++++++++++++++++++++++++++++++++++ mm/memory.c | 5 +++-- 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f327d62fc985..2d669be7f1c8 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -496,6 +496,8 @@ static inline bool folio_test_pmd_mappable(struct folio *folio) vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); +vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf); + extern struct folio *huge_zero_folio; extern unsigned long huge_zero_pfn; @@ -671,6 +673,11 @@ static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) return 0; } +static inline vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf) +{ + return 0; +} + static inline bool is_huge_zero_folio(const struct folio *folio) { return false; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 05c68f5b5fe3..8c95a658b3ec 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1287,6 +1287,44 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf) } +vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + vm_fault_t ret = 0; + spinlock_t *ptl; + swp_entry_t swp_entry; + struct page *page; + struct folio *folio; + + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { + vma_end_read(vma); + return VM_FAULT_RETRY; + } + + ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (unlikely(!pmd_same(*vmf->pmd, vmf->orig_pmd))) { + spin_unlock(ptl); + return 0; + } + + swp_entry = pmd_to_swp_entry(vmf->orig_pmd); + page = pfn_swap_entry_to_page(swp_entry); + folio = page_folio(page); + vmf->page = page; + vmf->pte = NULL; + if (folio_trylock(folio)) { + folio_get(folio); + spin_unlock(ptl); + ret = page_pgmap(page)->ops->migrate_to_ram(vmf); + folio_unlock(folio); + folio_put(folio); + } else { + spin_unlock(ptl); + } + + return ret; +} + /* * always: directly stall for all thp allocations * defer: wake kswapd and fail if not immediately available diff --git a/mm/memory.c b/mm/memory.c index 7e32eb79ba99..59985b88a9b1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6337,8 +6337,9 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, vmf.orig_pmd = pmdp_get_lockless(vmf.pmd); if (unlikely(is_swap_pmd(vmf.orig_pmd))) { - VM_BUG_ON(thp_migration_supported() && - !is_pmd_migration_entry(vmf.orig_pmd)); + if (is_pmd_device_private_entry(vmf.orig_pmd)) + return do_huge_pmd_device_private(&vmf); + if (is_pmd_migration_entry(vmf.orig_pmd)) pmd_migration_entry_wait(mm, vmf.pmd); return 0; -- 2.51.0
