Now that the two-pass notifier flow uses xe_vma_userptr_do_inval() for
the fence-wait + TLB-invalidate work, extend it to support a further
deferred TLB wait:
- xe_vma_userptr_do_inval(): when the embedded finish handle is free,
submit the TLB invalidation asynchronously (xe_vm_invalidate_vma_submit)
and return &userptr->finish so the mmu_notifier core schedules a third
pass. When the handle is occupied by a concurrent invalidation, fall
back to the synchronous xe_vm_invalidate_vma() path.
- xe_vma_userptr_complete_tlb_inval(): new helper called from
invalidate_finish when tlb_inval_submitted is set. Waits for the
previously submitted batch and unmaps the gpusvm pages.
xe_vma_userptr_invalidate_finish() dispatches between the two helpers
via tlb_inval_submitted, making the three possible flows explicit:
pass1 (fences pending) -> invalidate_finish -> do_inval (sync TLB)
pass1 (fences done) -> do_inval -> invalidate_finish
-> complete_tlb_inval (deferred TLB)
pass1 (finish occupied) -> do_inval (sync TLB, inline)
In multi-GPU scenarios this allows TLB flushes to be submitted on all
GPUs in one pass before any of them are waited on.
Also adds xe_vm_invalidate_vma_submit() which submits the TLB range
invalidation without blocking, populating a xe_tlb_inval_batch that
the caller waits on separately.
Assisted-by: GitHub Copilot:claude-sonnet-4.6
Signed-off-by: Thomas Hellström <[email protected]>
---
drivers/gpu/drm/xe/xe_userptr.c | 60 +++++++++++++++++++++++++++------
drivers/gpu/drm/xe/xe_userptr.h | 18 ++++++++++
drivers/gpu/drm/xe/xe_vm.c | 38 ++++++++++++++++-----
drivers/gpu/drm/xe/xe_vm.h | 2 ++
4 files changed, 99 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c
index 440b0a79d16f..a62b796afb93 100644
--- a/drivers/gpu/drm/xe/xe_userptr.c
+++ b/drivers/gpu/drm/xe/xe_userptr.c
@@ -8,6 +8,7 @@
#include <linux/mm.h>
+#include "xe_tlb_inval.h"
#include "xe_trace_bo.h"
/**
@@ -73,8 +74,8 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
&ctx);
}
-static void xe_vma_userptr_do_inval(struct xe_vm *vm, struct xe_userptr_vma
*uvma,
- bool is_deferred)
+static struct mmu_interval_notifier_finish *
+xe_vma_userptr_do_inval(struct xe_vm *vm, struct xe_userptr_vma *uvma, bool
is_deferred)
{
struct xe_userptr *userptr = &uvma->userptr;
struct xe_vma *vma = &uvma->vma;
@@ -84,12 +85,23 @@ static void xe_vma_userptr_do_inval(struct xe_vm *vm,
struct xe_userptr_vma *uvm
};
long err;
- err = dma_resv_wait_timeout(xe_vm_resv(vm),
- DMA_RESV_USAGE_BOOKKEEP,
+ err = dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP,
false, MAX_SCHEDULE_TIMEOUT);
XE_WARN_ON(err <= 0);
if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
+ if (!userptr->finish_inuse) {
+ /*
+ * Defer the TLB wait to an extra pass so the caller
+ * can pipeline TLB flushes across GPUs before waiting
+ * on any of them.
+ */
+ userptr->finish_inuse = true;
+ userptr->tlb_inval_submitted = true;
+ err = xe_vm_invalidate_vma_submit(vma,
&userptr->inval_batch);
+ XE_WARN_ON(err);
+ return &userptr->finish;
+ }
err = xe_vm_invalidate_vma(vma);
XE_WARN_ON(err);
}
@@ -98,6 +110,24 @@ static void xe_vma_userptr_do_inval(struct xe_vm *vm,
struct xe_userptr_vma *uvm
userptr->finish_inuse = false;
drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
+ return NULL;
+}
+
+static void
+xe_vma_userptr_complete_tlb_inval(struct xe_vm *vm, struct xe_userptr_vma
*uvma)
+{
+ struct xe_userptr *userptr = &uvma->userptr;
+ struct xe_vma *vma = &uvma->vma;
+ struct drm_gpusvm_ctx ctx = {
+ .in_notifier = true,
+ .read_only = xe_vma_read_only(vma),
+ };
+
+ xe_tlb_inval_batch_wait(&userptr->inval_batch);
+ userptr->tlb_inval_submitted = false;
+ userptr->finish_inuse = false;
+ drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
+ xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
}
static struct mmu_interval_notifier_finish *
@@ -141,13 +171,11 @@ xe_vma_userptr_invalidate_pass1(struct xe_vm *vm, struct
xe_userptr_vma *uvma)
* If it's already in use, or all fences are already signaled,
* proceed directly to invalidation without deferring.
*/
- if (signaled || userptr->finish_inuse) {
- xe_vma_userptr_do_inval(vm, uvma, false);
- return NULL;
- }
+ if (signaled || userptr->finish_inuse)
+ return xe_vma_userptr_do_inval(vm, uvma, false);
+ /* Defer: the notifier core will call invalidate_finish once done. */
userptr->finish_inuse = true;
-
return &userptr->finish;
}
@@ -193,7 +221,15 @@ static void xe_vma_userptr_invalidate_finish(struct
mmu_interval_notifier_finish
xe_vma_start(vma), xe_vma_size(vma));
down_write(&vm->svm.gpusvm.notifier_lock);
- xe_vma_userptr_do_inval(vm, uvma, true);
+ /*
+ * If a TLB invalidation was previously submitted (deferred from the
+ * synchronous pass1 fallback), wait for it and unmap pages.
+ * Otherwise, fences have now completed: invalidate the TLB and unmap.
+ */
+ if (uvma->userptr.tlb_inval_submitted)
+ xe_vma_userptr_complete_tlb_inval(vm, uvma);
+ else
+ xe_vma_userptr_do_inval(vm, uvma, true);
up_write(&vm->svm.gpusvm.notifier_lock);
trace_xe_vma_userptr_invalidate_complete(vma);
}
@@ -231,7 +267,9 @@ void xe_vma_userptr_force_invalidate(struct xe_userptr_vma
*uvma)
finish = xe_vma_userptr_invalidate_pass1(vm, uvma);
if (finish)
- xe_vma_userptr_do_inval(vm, uvma, true);
+ finish = xe_vma_userptr_do_inval(vm, uvma, true);
+ if (finish)
+ xe_vma_userptr_complete_tlb_inval(vm, uvma);
}
#endif
diff --git a/drivers/gpu/drm/xe/xe_userptr.h b/drivers/gpu/drm/xe/xe_userptr.h
index 4f42db61fd62..7477009651c2 100644
--- a/drivers/gpu/drm/xe/xe_userptr.h
+++ b/drivers/gpu/drm/xe/xe_userptr.h
@@ -14,6 +14,8 @@
#include <drm/drm_gpusvm.h>
+#include "xe_tlb_inval_types.h"
+
struct xe_vm;
struct xe_vma;
struct xe_userptr_vma;
@@ -63,6 +65,15 @@ struct xe_userptr {
* Protected by @vm::svm.gpusvm.notifier_lock.
*/
struct mmu_interval_notifier_finish finish;
+
+ /**
+ * @inval_batch: TLB invalidation batch for deferred completion.
+ * Stores an in-flight TLB invalidation submitted during a two-pass
+ * notifier so the wait can be deferred to a subsequent pass, allowing
+ * multiple GPUs to be signalled before any of them are waited on.
+ * Protected by @vm::svm.gpusvm.notifier_lock.
+ */
+ struct xe_tlb_inval_batch inval_batch;
/**
* @finish_inuse: Whether @finish is currently in use by an in-progress
* two-pass invalidation.
@@ -70,6 +81,13 @@ struct xe_userptr {
*/
bool finish_inuse;
+ /**
+ * @tlb_inval_submitted: Whether a TLB invalidation has been submitted
+ * via @inval_batch and is pending completion. When set, the next pass
+ * must call xe_tlb_inval_batch_wait() before reusing @inval_batch.
+ * Protected by @vm::svm.gpusvm.notifier_lock.
+ */
+ bool tlb_inval_submitted;
/**
* @initial_bind: user pointer has been bound at least once.
* write: vm->svm.gpusvm.notifier_lock in read mode and vm->resv held.
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 7f29d2b2972d..fdad9329dfb4 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3967,20 +3967,23 @@ void xe_vm_unlock(struct xe_vm *vm)
}
/**
- * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
+ * xe_vm_invalidate_vma_submit - Submit a job to invalidate GPU mappings for
+ * VMA.
* @vma: VMA to invalidate
+ * @batch: TLB invalidation batch to populate; caller must later call
+ * xe_tlb_inval_batch_wait() on it to wait for completion
*
* Walks a list of page tables leaves which it memset the entries owned by this
- * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
- * complete.
+ * VMA to zero, invalidates the TLBs, but doesn't block waiting for TLB flush
+ * to complete, but instead populates @batch which can be waited on using
+ * xe_tlb_inval_batch_wait().
*
* Returns 0 for success, negative error code otherwise.
*/
-int xe_vm_invalidate_vma(struct xe_vma *vma)
+int xe_vm_invalidate_vma_submit(struct xe_vma *vma, struct xe_tlb_inval_batch
*batch)
{
struct xe_device *xe = xe_vma_vm(vma)->xe;
struct xe_vm *vm = xe_vma_vm(vma);
- struct xe_tlb_inval_batch _batch;
struct xe_tile *tile;
u8 tile_mask = 0;
int ret = 0;
@@ -4023,14 +4026,33 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
ret = xe_tlb_inval_range_tilemask_submit(xe, xe_vma_vm(vma)->usm.asid,
xe_vma_start(vma),
xe_vma_end(vma),
- tile_mask, &_batch);
+ tile_mask, batch);
/* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
+ return ret;
+}
+
+/**
+ * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
+ * @vma: VMA to invalidate
+ *
+ * Walks a list of page tables leaves which it memset the entries owned by this
+ * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
+ * complete.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_vm_invalidate_vma(struct xe_vma *vma)
+{
+ struct xe_tlb_inval_batch batch;
+ int ret;
- if (!ret)
- xe_tlb_inval_batch_wait(&_batch);
+ ret = xe_vm_invalidate_vma_submit(vma, &batch);
+ if (ret)
+ return ret;
+ xe_tlb_inval_batch_wait(&batch);
return ret;
}
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 62f4b6fec0bc..0bc7ed23eeae 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -242,6 +242,8 @@ struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
int xe_vm_invalidate_vma(struct xe_vma *vma);
+int xe_vm_invalidate_vma_submit(struct xe_vma *vma, struct xe_tlb_inval_batch
*batch);
+
int xe_vm_validate_protected(struct xe_vm *vm);
static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
--
2.53.0