Hello,

I want to prepend that I've never done kernel programming and am not familiar 
with the amdgpu driver. That said, I read through the issues on Mesa regarding 
its behavior when RADV runs out of VRAM [1] and the related information.

Thereby I stumbled upon Christian König mentioning that it would be buggy 
behavior if the preferred domain included GTT when the initial domain was 
updated [2]. I read some of the related code in the kernel driver, trying to 
understand it, and got the impression that this is actually the case. When 
amdgpu_gem_object_create() fails in amdgpu_gem_create_ioctl(), 
"AMDGPU_GEM_DOMAIN_GTT" is added to "initial_domain" which during the retry 
gets written into both, "domain" and "preferred_domain" in 
amdgpu_gem_object_create(). Is this intended or a bug?

I've attached a minimal proof-of-concept patch (applied to kernel 6.14-rc7) 
that runs fine for me. I don't know though if this has any real effect on the 
driver's memory management. So please excuse me if this is a dumb 
question/patch and I did steal your time. Any insight would still be 
appreciated.

Thanks and best regards
Malte

[1] https://gitlab.freedesktop.org/mesa/mesa/-/issues/8763
[2] https://lists.freedesktop.org/archives/amd-gfx/2024-April/107401.html
______________________________________________________________________________
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c	2025-03-16 23:55:17.000000000 +0100
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c	2025-03-20 22:58:59.849195645 +0100
@@ -93,7 +93,7 @@
 }
 
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
-			     int alignment, u32 initial_domain,
+			     int alignment, u32 initial_domain, u32 domain,
 			     u64 flags, enum ttm_bo_type type,
 			     struct dma_resv *resv,
 			     struct drm_gem_object **obj, int8_t xcp_id_plus1)
@@ -113,7 +113,7 @@
 	bp.resv = resv;
 	bp.preferred_domain = initial_domain;
 	bp.flags = flags;
-	bp.domain = initial_domain;
+	bp.domain = domain;
 	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
 	bp.xcp_id_plus1 = xcp_id_plus1;
 
@@ -318,7 +318,7 @@
 	uint64_t size = args->in.bo_size;
 	struct dma_resv *resv = NULL;
 	struct drm_gem_object *gobj;
-	uint32_t handle, initial_domain;
+	uint32_t handle, initial_domain, domain;
 	int r;
 
 	/* reject DOORBELLs until userspace code to use it is available */
@@ -371,9 +371,9 @@
 	}
 
-	initial_domain = (u32)(0xffffffff & args->in.domains);
+	domain = initial_domain = (u32)(0xffffffff & args->in.domains);
 retry:
 	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
-				     initial_domain,
+				     initial_domain, domain,
 				     flags, ttm_bo_type_device, resv, &gobj, fpriv->xcp_id + 1);
 	if (r && r != -ERESTARTSYS) {
 		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
@@ -381,8 +382,8 @@
 			goto retry;
 		}
 
-		if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
-			initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
+		if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+			domain |= AMDGPU_GEM_DOMAIN_GTT;
 			goto retry;
 		}
 		DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n",
@@ -443,7 +444,7 @@
 	}
 
 	/* create a gem object to contain this object in */
-	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
+	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_DOMAIN_CPU,
 				     0, ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
 	if (r)
 		return r;
@@ -967,7 +968,7 @@
 	args->size = ALIGN(args->size, PAGE_SIZE);
 	domain = amdgpu_bo_get_preferred_domain(adev,
 				amdgpu_display_supported_domains(adev, flags));
-	r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags,
+	r = amdgpu_gem_object_create(adev, args->size, 0, domain, domain, flags,
 				     ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
 	if (r)
 		return -ENOMEM;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h	2025-03-16 23:55:17.000000000 +0100
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h	2025-03-20 21:48:44.416738211 +0100
@@ -42,7 +42,7 @@
  */
 void amdgpu_gem_force_release(struct amdgpu_device *adev);
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
-			     int alignment, u32 initial_domain,
+			     int alignment, u32 initial_domain, u32 domain,
 			     u64 flags, enum ttm_bo_type type,
 			     struct dma_resv *resv,
 			     struct drm_gem_object **obj, int8_t xcp_id_plus1);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c	2025-03-16 23:55:17.000000000 +0100
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c	2025-03-20 22:32:53.057709080 +0100
@@ -328,7 +328,7 @@
 					AMDGPU_GEM_CREATE_UNCACHED);
 
 	ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1,
-			AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags,
+			AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags,
 			ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj, 0);
 
 	amdgpu_bo_unreserve(mem->bo);
@@ -1815,7 +1815,7 @@
 		 va, (*mem)->aql_queue ? size << 1 : size,
 		 domain_string(alloc_domain), xcp_id);
 
-	ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
+	ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_domain, alloc_flags,
 				       bo_type, NULL, &gobj, xcp_id + 1);
 	if (ret) {
 		pr_debug("Failed to create BO on domain %s. ret %d\n",
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c	2025-03-20 22:31:38.770603335 +0100
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c	2025-03-20 22:31:51.866509502 +0100
@@ -308,7 +308,7 @@
 	}
 
 	ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE,
-				       AMDGPU_GEM_DOMAIN_CPU, flags,
+				       AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_DOMAIN_CPU, flags,
 				       ttm_bo_type_sg, resv, &gobj, 0);
 	if (ret)
 		goto error;

Reply via email to