Hi,

Attached is at patch described as follows:

For creating and reusing already existing buffer objects,
take care of the placement domains.
Have a little more finegrained usage to placement mapping.

This change does not account for the real location of the
buffer object for buffer objects returned from r600_bomgr.
But it greatly increases the probability to get buffers
that already have the desired placement.

This change avoids plenty of gpu uploads on command
stream emission. Which in turn avoids plenty of implicit
syncronization on the upload fences. Which, as a whole,
turns into a noticable performance gain.

Please review, apply, suggest improvements ...
Thanks
Mathias
From 79ca3af6dc5c59aba083650126287f38e0b5dd86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathias=20Fr=C3=B6hlich?= <mathias.froehl...@web.de>
Date: Sun, 13 Mar 2011 08:38:03 +0100
Subject: [PATCH] r600g: Take care of bo placement domains.

For creating and reusing already existing buffer objects,
take care of the placement domains.
Have a little more finegrained usage to placement mapping.

This change does not account for the real location of the
buffer object for buffer objects returned from r600_bomgr.
But it greatly increases the probability to get buffers
that already have the desired placement.

This change avoids plenty of gpu uploads on command
stream emission. Which in turn avoids plenty of implicit
syncronization on the upload fences. Which, as a whole,
turns into a noticable performance gain.
---
 src/gallium/winsys/r600/drm/r600_bo.c    |   42 ++++++++++++++++++++----------
 src/gallium/winsys/r600/drm/r600_bomgr.c |    8 +++++-
 src/gallium/winsys/r600/drm/r600_priv.h  |    3 +-
 src/gallium/winsys/r600/drm/radeon_bo.c  |    4 +-
 4 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index e9c650d..66ca522 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -38,15 +38,39 @@ struct r600_bo *r600_bo(struct radeon *radeon,
 {
 	struct r600_bo *bo;
 	struct radeon_bo *rbo;
+	unsigned domains;
+
+	switch (usage) {
+	case PIPE_USAGE_DYNAMIC:
+	case PIPE_USAGE_STREAM:
+		/* Do not put dynamic or stream resources into uncachable vram.
+		 */
+		domains = RADEON_GEM_DOMAIN_GTT;
+		break;
+	case PIPE_USAGE_STAGING:
+		/* Staging resources particpate in transfers and blits only
+		 * and are used for uploads and downloads from regular
+		 * resources. We generate them internally for some transfers.
+		 */
+		domains = RADEON_GEM_DOMAIN_CPU | RADEON_GEM_DOMAIN_GTT;
+		break;
+	case PIPE_USAGE_DEFAULT:
+	case PIPE_USAGE_STATIC:
+	case PIPE_USAGE_IMMUTABLE:
+	default:
+		domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
+		break;
+	}
 
 	if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) {
-		bo = r600_bomgr_bo_create(radeon->bomgr, size, alignment, *radeon->cfence);
+		bo = r600_bomgr_bo_create(radeon->bomgr, size, alignment, domains, *radeon->cfence);
 		if (bo) {
 			return bo;
 		}
 	}
 
-	rbo = radeon_bo(radeon, 0, size, alignment);
+
+	rbo = radeon_bo(radeon, 0, size, alignment, domains);
 	if (rbo == NULL) {
 		return NULL;
 	}
@@ -58,17 +82,7 @@ struct r600_bo *r600_bo(struct radeon *radeon,
 	if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) {
 		r600_bomgr_bo_init(radeon->bomgr, bo);
 	}
-
-	/* Staging resources particpate in transfers and blits only
-	 * and are used for uploads and downloads from regular
-	 * resources.  We generate them internally for some transfers.
-	 */
-	if (usage == PIPE_USAGE_STAGING)
-		bo->domains = RADEON_GEM_DOMAIN_CPU | RADEON_GEM_DOMAIN_GTT;
-	else
-		bo->domains = (RADEON_GEM_DOMAIN_CPU |
-				RADEON_GEM_DOMAIN_GTT |
-				RADEON_GEM_DOMAIN_VRAM);
+	bo->domains = domains;
 
 	pipe_reference_init(&bo->reference, 1);
 	return bo;
@@ -80,7 +94,7 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon,
 	struct r600_bo *bo = calloc(1, sizeof(struct r600_bo));
 	struct radeon_bo *rbo;
 
-	rbo = bo->bo = radeon_bo(radeon, handle, 0, 0);
+	rbo = bo->bo = radeon_bo(radeon, handle, 0, 0, 0);
 	if (rbo == NULL) {
 		free(bo);
 		return NULL;
diff --git a/src/gallium/winsys/r600/drm/r600_bomgr.c b/src/gallium/winsys/r600/drm/r600_bomgr.c
index 446ef0f..c6ed737 100644
--- a/src/gallium/winsys/r600/drm/r600_bomgr.c
+++ b/src/gallium/winsys/r600/drm/r600_bomgr.c
@@ -53,12 +53,17 @@ static INLINE int r600_bo_is_compat(struct r600_bomgr *mgr,
 					struct r600_bo *bo,
 					unsigned size,
 					unsigned alignment,
+					unsigned domains,
 					unsigned cfence)
 {
 	if(bo->size < size) {
 		return 0;
 	}
 
+	if(bo->domains != domains) {
+		return 0;
+	}
+
 	/* be lenient with size */
 	if(bo->size >= 2*size) {
 		return 0;
@@ -78,6 +83,7 @@ static INLINE int r600_bo_is_compat(struct r600_bomgr *mgr,
 struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr,
 					unsigned size,
 					unsigned alignment,
+					unsigned domains,
 					unsigned cfence)
 {
 	struct r600_bo *bo, *tmp;
@@ -88,7 +94,7 @@ struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr,
 
 	now = os_time_get();
 	LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) {
-		if(r600_bo_is_compat(mgr, bo, size, alignment, cfence)) {
+		if(r600_bo_is_compat(mgr, bo, size, alignment, domains, cfence)) {
 			LIST_DEL(&bo->list);
 			--mgr->num_delayed;
 			r600_bomgr_timeout_flush(mgr);
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 27bdf2b..ca04131 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -131,7 +131,7 @@ unsigned radeon_family_from_device(unsigned device);
  * radeon_bo.c
  */
 struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
-			    unsigned size, unsigned alignment);
+			    unsigned size, unsigned alignment, unsigned domains);
 void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst,
 			 struct radeon_bo *src);
 int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo);
@@ -177,6 +177,7 @@ void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo);
 struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr,
 					unsigned size,
 					unsigned alignment,
+					unsigned domains,
 					unsigned cfence);
 
 
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
index 13b1d50..5dc98bf 100644
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo.c
@@ -69,7 +69,7 @@ static void radeon_bo_fixed_unmap(struct radeon *radeon, struct radeon_bo *bo)
 }
 
 struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
-			unsigned size, unsigned alignment)
+			    unsigned size, unsigned alignment, unsigned domains)
 {
 	struct radeon_bo *bo;
 	int r;
@@ -113,7 +113,7 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
 
 		args.size = size;
 		args.alignment = alignment;
-		args.initial_domain = RADEON_GEM_DOMAIN_CPU;
+		args.initial_domain = domains;
 		args.flags = 0;
 		args.handle = 0;
 		r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_CREATE,
-- 
1.7.4.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to