From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeon/radeon_winsys.h | 24 -------------------- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 27 +++++++++-------------- src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 2 +- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 27 ++++++++++------------- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 2 +- 5 files changed, 25 insertions(+), 57 deletions(-)
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 9f274b4..7914170 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -716,44 +716,20 @@ static inline unsigned radeon_flags_from_heap(enum radeon_heap heap) RADEON_FLAG_32BIT; case RADEON_HEAP_VRAM: case RADEON_HEAP_GTT_WC: case RADEON_HEAP_GTT: default: return flags; } } -/* The pb cache bucket is chosen to minimize pb_cache misses. - * It must be between 0 and 3 inclusive. - */ -static inline unsigned radeon_get_pb_cache_bucket_index(enum radeon_heap heap) -{ - switch (heap) { - case RADEON_HEAP_VRAM_NO_CPU_ACCESS: - return 0; - case RADEON_HEAP_VRAM_READ_ONLY: - case RADEON_HEAP_VRAM_READ_ONLY_32BIT: - case RADEON_HEAP_VRAM_32BIT: - case RADEON_HEAP_VRAM: - return 1; - case RADEON_HEAP_GTT_WC: - case RADEON_HEAP_GTT_WC_READ_ONLY: - case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT: - case RADEON_HEAP_GTT_WC_32BIT: - return 2; - case RADEON_HEAP_GTT: - default: - return 3; - } -} - /* Return the heap index for winsys allocators, or -1 on failure. */ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeon_bo_flag flags) { /* VRAM implies WC (write combining) */ assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC); /* NO_CPU_ACCESS implies VRAM only. */ assert(!(flags & RADEON_FLAG_NO_CPU_ACCESS) || domain == RADEON_DOMAIN_VRAM); /* Resources with interprocess sharing don't use any winsys allocators. */ diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 92c314e..5d565ff 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -366,43 +366,44 @@ static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo) simple_mtx_lock(&ws->global_bo_list_lock); LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list); ws->num_buffers++; simple_mtx_unlock(&ws->global_bo_list_lock); } } static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, uint64_t size, unsigned alignment, - unsigned usage, enum radeon_bo_domain initial_domain, unsigned flags, - unsigned pb_cache_bucket) + int heap) { struct amdgpu_bo_alloc_request request = {0}; amdgpu_bo_handle buf_handle; uint64_t va = 0; struct amdgpu_winsys_bo *bo; amdgpu_va_handle va_handle; unsigned va_gap_size; int r; /* VRAM or GTT must be specified, but not both at the same time. */ assert(util_bitcount(initial_domain & RADEON_DOMAIN_VRAM_GTT) == 1); bo = CALLOC_STRUCT(amdgpu_winsys_bo); if (!bo) { return NULL; } - pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base, - pb_cache_bucket); + if (heap >= 0) { + pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base, + heap); + } request.alloc_size = size; request.phys_alignment = alignment; if (initial_domain & RADEON_DOMAIN_VRAM) request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; if (initial_domain & RADEON_DOMAIN_GTT) request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; /* If VRAM is just stolen system memory, allow both VRAM and * GTT, whichever has free space. If a buffer is evicted from @@ -446,21 +447,21 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, if (!(flags & RADEON_FLAG_READ_ONLY)) vm_flags |= AMDGPU_VM_PAGE_WRITEABLE; r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags, AMDGPU_VA_OP_MAP); if (r) goto error_va_map; pipe_reference_init(&bo->base.reference, 1); bo->base.alignment = alignment; - bo->base.usage = usage; + bo->base.usage = 0; bo->base.size = size; bo->base.vtbl = &amdgpu_winsys_bo_vtbl; bo->ws = ws; bo->bo = buf_handle; bo->va = va; bo->u.real.va_handle = va_handle; bo->initial_domain = initial_domain; bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1); bo->is_local = !!(request.flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID); @@ -1155,21 +1156,21 @@ static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf, static struct pb_buffer * amdgpu_bo_create(struct radeon_winsys *rws, uint64_t size, unsigned alignment, enum radeon_bo_domain domain, enum radeon_bo_flag flags) { struct amdgpu_winsys *ws = amdgpu_winsys(rws); struct amdgpu_winsys_bo *bo; - unsigned usage = 0, pb_cache_bucket = 0; + int heap = -1; /* VRAM implies WC. This is not optional. */ assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC); /* NO_CPU_ACCESS is valid with VRAM only. */ assert(domain == RADEON_DOMAIN_VRAM || !(flags & RADEON_FLAG_NO_CPU_ACCESS)); /* Sparse buffers must have NO_CPU_ACCESS set. */ assert(!(flags & RADEON_FLAG_SPARSE) || flags & RADEON_FLAG_NO_CPU_ACCESS); @@ -1214,43 +1215,37 @@ no_slab: /* Align size to page size. This is the minimum alignment for normal * BOs. Aligning this here helps the cached bufmgr. Especially small BOs, * like constant/uniform buffers, can benefit from better and more reuse. */ size = align64(size, ws->info.gart_page_size); alignment = align(alignment, ws->info.gart_page_size); bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING; if (use_reusable_pool) { - int heap = radeon_get_heap_index(domain, flags); + heap = radeon_get_heap_index(domain, flags); assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS); - usage = 1 << heap; /* Only set one usage bit for each heap. */ - - pb_cache_bucket = radeon_get_pb_cache_bucket_index(heap); /* Get a buffer from the cache. */ bo = (struct amdgpu_winsys_bo*) - pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage, - pb_cache_bucket); + pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, 0, heap); if (bo) return &bo->base; } /* Create a new one. */ - bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags, - pb_cache_bucket); + bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap); if (!bo) { /* Clear the cache and try again. */ pb_slabs_reclaim(&ws->bo_slabs); pb_cache_release_all_buffers(&ws->bo_cache); - bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags, - pb_cache_bucket); + bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap); if (!bo) return NULL; } bo->u.real.use_reusable_pool = use_reusable_pool; return &bo->base; } static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws, struct winsys_handle *whandle, diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index 707ebf9..f4bbd3e 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -278,21 +278,21 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, goto fail; ws->dev = dev; ws->info.drm_major = drm_major; ws->info.drm_minor = drm_minor; if (!do_winsys_init(ws, fd)) goto fail_alloc; /* Create managers. */ - pb_cache_init(&ws->bo_cache, 4, + pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS, 500000, ws->check_vm ? 1.0f : 2.0f, 0, (ws->info.vram_size + ws->info.gart_size) / 8, amdgpu_bo_destroy, amdgpu_bo_can_reclaim); if (!pb_slabs_init(&ws->bo_slabs, AMDGPU_SLAB_MIN_SIZE_LOG2, AMDGPU_SLAB_MAX_SIZE_LOG2, RADEON_MAX_SLAB_HEAPS, ws, amdgpu_bo_can_reclaim_slab, amdgpu_bo_slab_alloc, diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 4be6f40..7aef238 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -582,24 +582,23 @@ static void radeon_bo_unmap(struct pb_buffer *_buf) mtx_unlock(&bo->u.real.map_mutex); } static const struct pb_vtbl radeon_bo_vtbl = { radeon_bo_destroy_or_cache /* other functions are never called */ }; static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws, unsigned size, unsigned alignment, - unsigned usage, unsigned initial_domains, unsigned flags, - unsigned pb_cache_bucket) + int heap) { struct radeon_bo *bo; struct drm_radeon_gem_create args; int r; memset(&args, 0, sizeof(args)); assert(initial_domains); assert((initial_domains & ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0); @@ -632,31 +631,34 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws, } assert(args.handle != 0); bo = CALLOC_STRUCT(radeon_bo); if (!bo) return NULL; pipe_reference_init(&bo->base.reference, 1); bo->base.alignment = alignment; - bo->base.usage = usage; + bo->base.usage = 0; bo->base.size = size; bo->base.vtbl = &radeon_bo_vtbl; bo->rws = rws; bo->handle = args.handle; bo->va = 0; bo->initial_domain = initial_domains; bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1); (void) mtx_init(&bo->u.real.map_mutex, mtx_plain); - pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base, - pb_cache_bucket); + + if (heap >= 0) { + pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base, + heap); + } if (rws->info.has_virtual_memory) { struct drm_radeon_gem_va va; unsigned va_gap_size; va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0; bo->va = radeon_bomgr_find_va(rws, size + va_gap_size, alignment); va.handle = bo->handle; va.vm_id = 0; @@ -914,21 +916,21 @@ static void radeon_bo_set_metadata(struct pb_buffer *_buf, static struct pb_buffer * radeon_winsys_bo_create(struct radeon_winsys *rws, uint64_t size, unsigned alignment, enum radeon_bo_domain domain, enum radeon_bo_flag flags) { struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); struct radeon_bo *bo; - unsigned usage = 0, pb_cache_bucket = 0; + int heap = -1; assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */ /* Only 32-bit sizes are supported. */ if (size > UINT_MAX) return NULL; /* VRAM implies WC. This is not optional. */ if (domain & RADEON_DOMAIN_VRAM) flags |= RADEON_FLAG_GTT_WC; @@ -973,41 +975,36 @@ no_slab: * BOs. Aligning this here helps the cached bufmgr. Especially small BOs, * like constant/uniform buffers, can benefit from better and more reuse. */ size = align(size, ws->info.gart_page_size); alignment = align(alignment, ws->info.gart_page_size); bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING; /* Shared resources don't use cached heaps. */ if (use_reusable_pool) { - int heap = radeon_get_heap_index(domain, flags); + heap = radeon_get_heap_index(domain, flags); assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS); - usage = 1 << heap; /* Only set one usage bit for each heap. */ - - pb_cache_bucket = radeon_get_pb_cache_bucket_index(heap); bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, - usage, pb_cache_bucket)); + 0, heap)); if (bo) return &bo->base; } - bo = radeon_create_bo(ws, size, alignment, usage, domain, flags, - pb_cache_bucket); + bo = radeon_create_bo(ws, size, alignment, domain, flags, heap); if (!bo) { /* Clear the cache and try again. */ if (ws->info.has_virtual_memory) pb_slabs_reclaim(&ws->bo_slabs); pb_cache_release_all_buffers(&ws->bo_cache); - bo = radeon_create_bo(ws, size, alignment, usage, domain, flags, - pb_cache_bucket); + bo = radeon_create_bo(ws, size, alignment, domain, flags, heap); if (!bo) return NULL; } bo->u.real.use_reusable_pool = use_reusable_pool; mtx_lock(&ws->bo_handles_mutex); util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo); mtx_unlock(&ws->bo_handles_mutex); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index f7d7998..25faa40 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -752,21 +752,21 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config, if (!ws) { mtx_unlock(&fd_tab_mutex); return NULL; } ws->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); if (!do_winsys_init(ws)) goto fail1; - pb_cache_init(&ws->bo_cache, 4, + pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS, 500000, ws->check_vm ? 1.0f : 2.0f, 0, MIN2(ws->info.vram_size, ws->info.gart_size), radeon_bo_destroy, radeon_bo_can_reclaim); if (ws->info.has_virtual_memory) { /* There is no fundamental obstacle to using slab buffer allocation * without GPUVM, but enabling it requires making sure that the drivers * honor the address offset. */ -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev