On 03/09/2015 09:55 AM, Chris Wilson wrote:
Now with the trimmed memcpy before the command parser, we try to
allocate many different sizes of batches, predominantly one or two
pages. We can therefore speed up searching for a good sized batch by
keeping the objects of buckets of roughly the same size.
Signed-off-by: Chris Wilson <[email protected]>
---
drivers/gpu/drm/i915/i915_debugfs.c | 46 +++++++++++++++++++-----------
drivers/gpu/drm/i915/i915_drv.h | 2 +-
drivers/gpu/drm/i915/i915_gem.c | 2 +-
drivers/gpu/drm/i915/i915_gem_batch_pool.c | 45 +++++++++++++++++------------
drivers/gpu/drm/i915/i915_gem_batch_pool.h | 2 +-
5 files changed, 60 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
b/drivers/gpu/drm/i915/i915_debugfs.c
index d8ca5c89647c..042ad2fec484 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -378,15 +378,17 @@ static void print_batch_pool_stats(struct seq_file *m,
struct drm_i915_gem_object *obj;
struct file_stats stats;
struct intel_engine_cs *ring;
- int i;
+ int i, j;
memset(&stats, 0, sizeof(stats));
for_each_ring(ring, dev_priv, i) {
- list_for_each_entry(obj,
- &ring->batch_pool.cache_list,
- batch_pool_list)
- per_file_stats(0, obj, &stats);
+ for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) {
+ list_for_each_entry(obj,
+ &ring->batch_pool.cache_list[j],
+ batch_pool_link)
+ per_file_stats(0, obj, &stats);
+ }
}
print_file_stats(m, "batch pool", stats);
@@ -624,26 +626,38 @@ static int i915_gem_batch_pool_info(struct seq_file *m,
void *data)
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj;
struct intel_engine_cs *ring;
- int count = 0;
- int ret, i;
+ int total = 0;
+ int ret, i, j;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
for_each_ring(ring, dev_priv, i) {
- seq_printf(m, "%s cache:\n", ring->name);
- list_for_each_entry(obj,
- &ring->batch_pool.cache_list,
- batch_pool_list) {
- seq_puts(m, " ");
- describe_obj(m, obj);
- seq_putc(m, '\n');
- count++;
+ for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) {
+ int count;
+
+ count = 0;
+ list_for_each_entry(obj,
+ &ring->batch_pool.cache_list[j],
+ batch_pool_link)
+ count++;
+ seq_printf(m, "%s cache[%d]: %d objects\n",
+ ring->name, j, count);
+
+ list_for_each_entry(obj,
+ &ring->batch_pool.cache_list[j],
+ batch_pool_link) {
+ seq_puts(m, " ");
+ describe_obj(m, obj);
+ seq_putc(m, '\n');
+ }
+
+ total += count;
}
}
- seq_printf(m, "total: %d\n", count);
+ seq_printf(m, "total: %d\n", total);
mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9fe1274cfe59..5c7b89bd138d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1893,7 +1893,7 @@ struct drm_i915_gem_object {
/** Used in execbuf to temporarily hold a ref */
struct list_head obj_exec_link;
- struct list_head batch_pool_list;
+ struct list_head batch_pool_link;
/**
* This is set if the object is on the active lists (has pending
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index efb5545251c7..a0c35f80836c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4439,7 +4439,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
INIT_LIST_HEAD(&obj->ring_list);
INIT_LIST_HEAD(&obj->obj_exec_link);
INIT_LIST_HEAD(&obj->vma_list);
- INIT_LIST_HEAD(&obj->batch_pool_list);
+ INIT_LIST_HEAD(&obj->batch_pool_link);
obj->ops = ops;
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index 1287abf55b84..23374352d8eb 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -47,8 +47,12 @@
void i915_gem_batch_pool_init(struct drm_device *dev,
struct i915_gem_batch_pool *pool)
{
+ int n;
+
pool->dev = dev;
- INIT_LIST_HEAD(&pool->cache_list);
+
+ for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
+ INIT_LIST_HEAD(&pool->cache_list[n]);
}
/**
@@ -59,16 +63,20 @@ void i915_gem_batch_pool_init(struct drm_device *dev,
*/
void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
{
+ int n;
+
WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
- while (!list_empty(&pool->cache_list)) {
- struct drm_i915_gem_object *obj =
- list_first_entry(&pool->cache_list,
- struct drm_i915_gem_object,
- batch_pool_list);
+ for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+ while (!list_empty(&pool->cache_list[n])) {
+ struct drm_i915_gem_object *obj =
+ list_first_entry(&pool->cache_list[n],
+ struct drm_i915_gem_object,
+ batch_pool_link);
- list_del(&obj->batch_pool_list);
- drm_gem_object_unreference(&obj->base);
+ list_del(&obj->batch_pool_link);
+ drm_gem_object_unreference(&obj->base);
+ }
}
}
@@ -91,28 +99,29 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
{
struct drm_i915_gem_object *obj = NULL;
struct drm_i915_gem_object *tmp, *next;
+ struct list_head *list;
+ int n;
WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
- list_for_each_entry_safe(tmp, next,
- &pool->cache_list, batch_pool_list) {
+ n = fls(size >> PAGE_SHIFT) - 1;
Is this better in some way that size / PAGE_SIZE - 1? Or I went from
trusting compiler smartness to little to too much? :)
+ if (n >= ARRAY_SIZE(pool->cache_list))
+ n = ARRAY_SIZE(pool->cache_list) - 1;
+ list = &pool->cache_list[n];
+
+ list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
/* The batches are strictly LRU ordered */
if (tmp->active)
break;
/* While we're looping, do some clean up */
if (tmp->madv == __I915_MADV_PURGED) {
- list_del(&tmp->batch_pool_list);
+ list_del(&tmp->batch_pool_link);
drm_gem_object_unreference(&tmp->base);
continue;
}
- /*
- * Select a buffer that is at least as big as needed
- * but not 'too much' bigger. A better way to do this
- * might be to bucket the pool objects based on size.
- */
- if (tmp->base.size >= size && tmp->base.size <= 2 * size) {
+ if (tmp->base.size >= size) {
obj = tmp;
break;
}
@@ -132,7 +141,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
obj->madv = I915_MADV_DONTNEED;
}
- list_move_tail(&obj->batch_pool_list, &pool->cache_list);
+ list_move_tail(&obj->batch_pool_link, list);
i915_gem_object_pin_pages(obj);
return obj;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
index 5ed70ef6a887..848e90703eed 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -29,7 +29,7 @@
struct i915_gem_batch_pool {
struct drm_device *dev;
- struct list_head cache_list;
+ struct list_head cache_list[4];
};
/* i915_gem_batch_pool.c */
Pretty straightforward and trusting you on bucket sizes,
Reviewed-by: Tvrtko Ursulin <[email protected]>
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx