On 17.09.25 19:22, Sunil Khatri wrote: > we dont need to allocate local array of pages to hold > the pages returned by the hmm, instead we could use > the hmm_range structure itself to get to hmm_pfn > and get the required pages directly. > > This saved alloc/free a lot of memory without > any impact on performance. > > Signed-off-by: Sunil Khatri <sunil.kha...@amd.com> > Suggested-by: Christian König <christian.koe...@amd.com>
Reviewed-by: Christian König <christian.koe...@amd.com> > --- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 10 +++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 1 - > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 30 ++++--------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 5 ++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 10 +------ > drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 ++--- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 5 ++-- > drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +- > 9 files changed, 25 insertions(+), 48 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index c3b34a410375..7c54fe6b0f5d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -1089,7 +1089,7 @@ static int init_user_pages(struct kgd_mem *mem, > uint64_t user_addr, > return 0; > } > > - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range); > + ret = amdgpu_ttm_tt_get_user_pages(bo, &range); > if (ret) { > if (ret == -EAGAIN) > pr_debug("Failed to get user pages, try again\n"); > @@ -1103,6 +1103,9 @@ static int init_user_pages(struct kgd_mem *mem, > uint64_t user_addr, > pr_err("%s: Failed to reserve BO\n", __func__); > goto release_out; > } > + > + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range); > + > amdgpu_bo_placement_from_domain(bo, mem->domain); > ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); > if (ret) > @@ -2565,8 +2568,7 @@ static int update_invalid_user_pages(struct > amdkfd_process_info *process_info, > } > > /* Get updated user pages */ > - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, > - &mem->range); > + ret = amdgpu_ttm_tt_get_user_pages(bo, &mem->range); > if (ret) { > pr_debug("Failed %d to get user pages\n", ret); > > @@ -2595,6 +2597,8 @@ static int update_invalid_user_pages(struct > amdkfd_process_info *process_info, > ret = 0; > } > > + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range); > + > mutex_lock(&process_info->notifier_lock); > > /* Mark the BO as valid unless it was invalidated > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > index 555cd6d877c3..a716c9886c74 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > @@ -38,7 +38,6 @@ struct amdgpu_bo_list_entry { > struct amdgpu_bo *bo; > struct amdgpu_bo_va *bo_va; > uint32_t priority; > - struct page **user_pages; > struct hmm_range *range; > bool user_invalidated; > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index defb511acc5a..744e6ff69814 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -29,6 +29,7 @@ > #include <linux/pagemap.h> > #include <linux/sync_file.h> > #include <linux/dma-buf.h> > +#include <linux/hmm.h> > > #include <drm/amdgpu_drm.h> > #include <drm/drm_syncobj.h> > @@ -885,24 +886,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser > *p, > struct amdgpu_bo *bo = e->bo; > int i; > > - e->user_pages = kvcalloc(bo->tbo.ttm->num_pages, > - sizeof(struct page *), > - GFP_KERNEL); > - if (!e->user_pages) { > - drm_err(adev_to_drm(p->adev), "kvmalloc_array > failure\n"); > - r = -ENOMEM; > - goto out_free_user_pages; > - } > - > - r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range); > - if (r) { > - kvfree(e->user_pages); > - e->user_pages = NULL; > + r = amdgpu_ttm_tt_get_user_pages(bo, &e->range); > + if (r) > goto out_free_user_pages; > - } > > for (i = 0; i < bo->tbo.ttm->num_pages; i++) { > - if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { > + if (bo->tbo.ttm->pages[i] != > hmm_pfn_to_page(e->range->hmm_pfns[i])) { > userpage_invalidated = true; > break; > } > @@ -946,7 +935,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser > *p, > } > > if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) && > - e->user_invalidated && e->user_pages) { > + e->user_invalidated) { > amdgpu_bo_placement_from_domain(e->bo, > AMDGPU_GEM_DOMAIN_CPU); > r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement, > @@ -955,11 +944,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser > *p, > goto out_free_user_pages; > > amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm, > - e->user_pages); > + e->range); > } > - > - kvfree(e->user_pages); > - e->user_pages = NULL; > } > > amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, > @@ -1001,11 +987,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser > *p, > amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { > struct amdgpu_bo *bo = e->bo; > > - if (!e->user_pages) > - continue; > amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); > - kvfree(e->user_pages); > - e->user_pages = NULL; > e->range = NULL; > } > mutex_unlock(&p->bo_list->bo_list_mutex); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > index a8fa09184459..8524aa55e057 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > @@ -571,8 +571,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void > *data, > goto release_object; > > if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { > - r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, > - &range); > + r = amdgpu_ttm_tt_get_user_pages(bo, &range); > if (r) > goto release_object; > > @@ -580,6 +579,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void > *data, > if (r) > goto user_pages_done; > > + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range); > + > amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); > r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); > amdgpu_bo_unreserve(bo); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c > index e36fede7f74c..4441572d6ad1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c > @@ -167,7 +167,7 @@ void amdgpu_hmm_unregister(struct amdgpu_bo *bo) > > int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, > uint64_t start, uint64_t npages, bool readonly, > - void *owner, struct page **pages, > + void *owner, > struct hmm_range **phmm_range) > { > struct hmm_range *hmm_range; > @@ -222,14 +222,6 @@ int amdgpu_hmm_range_get_pages(struct > mmu_interval_notifier *notifier, > hmm_range->start = start; > hmm_range->hmm_pfns = pfns; > > - /* > - * Due to default_flags, all pages are HMM_PFN_VALID or > - * hmm_range_fault() fails. FIXME: The pages cannot be touched outside > - * the notifier_lock, and mmu_interval_read_retry() must be done first. > - */ > - for (i = 0; pages && i < npages; i++) > - pages[i] = hmm_pfn_to_page(pfns[i]); > - > *phmm_range = hmm_range; > > return 0; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h > index e2edcd010ccc..953e1d06de20 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h > @@ -33,7 +33,7 @@ > > int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, > uint64_t start, uint64_t npages, bool readonly, > - void *owner, struct page **pages, > + void *owner, > struct hmm_range **phmm_range); > bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 4e2486dbc0a6..280400b13c54 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -707,7 +707,7 @@ struct amdgpu_ttm_tt { > * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and > only > * once afterwards to stop HMM tracking > */ > -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, > +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, > struct hmm_range **range) > { > struct ttm_tt *ttm = bo->tbo.ttm; > @@ -744,7 +744,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, > struct page **pages, > > readonly = amdgpu_ttm_tt_is_readonly(ttm); > r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages, > - readonly, NULL, pages, range); > + readonly, NULL, range); > out_unlock: > mmap_read_unlock(mm); > if (r) > @@ -796,12 +796,12 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt > *ttm, > * that backs user memory and will ultimately be mapped into the device > * address space. > */ > -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) > +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range > *range) > { > unsigned long i; > > for (i = 0; i < ttm->num_pages; ++i) > - ttm->pages[i] = pages ? pages[i] : NULL; > + ttm->pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]); > } > > /* > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > index bb17987f0447..6ac94469ed40 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > @@ -190,7 +190,7 @@ void amdgpu_ttm_recover_gart(struct ttm_buffer_object > *tbo); > uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); > > #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) > -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, > +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, > struct hmm_range **range); > void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, > struct hmm_range *range); > @@ -198,7 +198,6 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, > struct hmm_range *range); > #else > static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, > - struct page **pages, > struct hmm_range **range) > { > return -EPERM; > @@ -214,7 +213,7 @@ static inline bool > amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, > } > #endif > > -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); > +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range > *range); > int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo, > uint64_t *user_addr); > int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > index 68ba239b2e5d..273f42e3afdd 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > @@ -1738,7 +1738,7 @@ static int svm_range_validate_and_map(struct mm_struct > *mm, > > WRITE_ONCE(p->svms.faulting_task, current); > r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, > npages, > - readonly, owner, NULL, > + readonly, owner, > &hmm_range); > WRITE_ONCE(p->svms.faulting_task, NULL); > if (r)