From: Roger He <[email protected]>
This can improve performance for some cases.
v2 (chk): handle all sizes, simplify the patch quite a bit
v3 (chk): adjust dw estimation as well
Signed-off-by: Roger He <[email protected]>
Signed-off-by: Christian König <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++++++++++++++++++++++------------
1 file changed, 49 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b08f031..1575657 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct
amdgpu_pte_update_params *params,
uint64_t start, uint64_t end,
uint64_t dst, uint64_t flags)
{
- int r;
-
/**
* The MC L1 TLB supports variable sized pages, based on a fragment
* field in the PTE. When this field is set to a non-zero value, page
@@ -1435,39 +1433,65 @@ static int amdgpu_vm_frag_ptes(struct
amdgpu_pte_update_params *params,
* Userspace can support this by aligning virtual base address and
* allocation size to the fragment size.
*/
- unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
- uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
- uint64_t frag_align = 1 << pages_per_frag;
+ unsigned max_frag = params->adev->vm_manager.fragment_size;
+ uint64_t frag_flags, frag_end;
+ unsigned frag;
- uint64_t frag_start = ALIGN(start, frag_align);
- uint64_t frag_end = end & ~(frag_align - 1);
+ int r;
/* system pages are non continuously */
- if (params->src || !(flags & AMDGPU_PTE_VALID) ||
- (frag_start >= frag_end))
+ if (params->src || !(flags & AMDGPU_PTE_VALID))
return amdgpu_vm_update_ptes(params, start, end, dst, flags);
- /* handle the 4K area at the beginning */
- if (start != frag_start) {
- r = amdgpu_vm_update_ptes(params, start, frag_start,
- dst, flags);
+ /* Handle the fragments at the beginning */
+ while (start != end) {
+ /* This intentionally wraps around if no bit is set */
+ frag = min(ffs(start), fls64(end - start)) - 1;
+ if (frag >= max_frag)
+ break;
+
+ frag_flags = AMDGPU_PTE_FRAG(frag);
+ frag_end = start + (1 << frag);
+
+ r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
+ flags | frag_flags);
if (r)
return r;
- dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
+
+ dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+ start = frag_end;
}
/* handle the area in the middle */
- r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
- flags | frag_flags);
- if (r)
- return r;
+ if (start != end) {
+ frag_flags = AMDGPU_PTE_FRAG(max_frag);
+ frag_end = end & ~((1 << max_frag) - 1);