Allocating at the pool's beneficial order normally uses
__GFP_RETRY_MAYFAIL to push hard for a contiguous high-order page. When
beneficial-order allocations have recently been failing, this retry just
burns CPU cycles in direct reclaim/compaction before falling back to a
smaller order anyway.

Add a beneficial_reclaim_backoff flag to struct ttm_operation_ctx and
plumb it through to ttm_pool_alloc_page(). When set, the beneficial
order is treated like the orders we already consider not worth stalling
for: __GFP_RECLAIM is cleared so the allocation skips direct
reclaim/compaction entirely, and the __GFP_NORETRY clearing /
__GFP_RETRY_MAYFAIL promotion is skipped. Together this makes the
allocator back off quickly to a smaller order instead of stalling.

This is a no-op until a caller opts in.

Cc: Carlos Santa <[email protected]>
Cc: Ryan Neph <[email protected]>
Cc: Christian Koenig <[email protected]>
Cc: Huang Rui <[email protected]>
Cc: Matthew Auld <[email protected]>
Cc: Maarten Lankhorst <[email protected]>
Cc: Maxime Ripard <[email protected]>
Cc: Thomas Zimmermann <[email protected]>
Cc: David Airlie <[email protected]>
Cc: Simona Vetter <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: Thomas Hellström <[email protected]>
Assisted-by: GitHub_Copilot:claude-opus-4.8
Signed-off-by: Matthew Brost <[email protected]>
---
 drivers/gpu/drm/ttm/ttm_pool.c | 12 ++++++++----
 include/drm/ttm/ttm_bo.h       |  6 ++++++
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 682ae4f40424..5b95f8f938f7 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -145,7 +145,8 @@ static int ttm_pool_nid(struct ttm_pool *pool)
 
 /* Allocate pages of size 1 << order with the given gfp_flags */
 static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
-                                       unsigned int order)
+                                       unsigned int order,
+                                       bool beneficial_reclaim_backoff)
 {
        const unsigned int beneficial_order = ttm_pool_beneficial_order(pool);
        unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS;
@@ -165,10 +166,12 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool 
*pool, gfp_t gfp_flags,
         * Do not add latency to the allocation path for allocations orders
         * device tolds us do not bring them additional performance gains.
         */
-       if (order && beneficial_order && order != beneficial_order)
+       if (order && (beneficial_reclaim_backoff ||
+                     (beneficial_order && order != beneficial_order)))
                gfp_flags &= ~__GFP_RECLAIM;
 
-       if (beneficial_order && order == beneficial_order) {
+       if (!beneficial_reclaim_backoff &&
+           beneficial_order && order == beneficial_order) {
                gfp_flags &= ~__GFP_NORETRY;
                gfp_flags |= __GFP_RETRY_MAYFAIL;
        }
@@ -814,7 +817,8 @@ static int __ttm_pool_alloc(struct ttm_pool *pool, struct 
ttm_tt *tt,
                if (!p) {
                        page_caching = ttm_cached;
                        allow_pools = false;
-                       p = ttm_pool_alloc_page(pool, gfp_flags, order);
+                       p = ttm_pool_alloc_page(pool, gfp_flags, order,
+                                               
ctx->beneficial_reclaim_backoff);
                }
                /* If that fails, lower the order if possible and retry. */
                if (!p) {
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 8310bc3d55f9..f59c175622d4 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -187,6 +187,12 @@ struct ttm_operation_ctx {
         * when multiple BOs share the same reservation object @resv.
         */
        bool allow_res_evict;
+       /**
+        * @beneficial_reclaim_backoff: Backoff from direct reclaim at the pools
+        * beneficial order. Useful when beneficial order allocation has
+        * recently failed and backoff from waiting CPU cycles.
+        */
+       bool beneficial_reclaim_backoff;
        /**
         * @resv: Reservation object to be used together with
         * @allow_res_evict.
-- 
2.34.1

Reply via email to