OA = ordered append (feature of GDS) Marek
On Mon, Nov 26, 2018 at 9:08 PM Mike Lothian <[email protected]> wrote: > Same for OA > > Cheers > > Mike > > On Tue, 27 Nov 2018, 01:57 Marek Olšák, <[email protected]> wrote: > >> From: Marek Olšák <[email protected]> >> >> --- >> src/gallium/drivers/radeon/radeon_winsys.h | 4 +- >> src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 55 +++++++++++++--------- >> 2 files changed, 36 insertions(+), 23 deletions(-) >> >> diff --git a/src/gallium/drivers/radeon/radeon_winsys.h >> b/src/gallium/drivers/radeon/radeon_winsys.h >> index 3d0bb75ef6e..a5dd3e6f9b1 100644 >> --- a/src/gallium/drivers/radeon/radeon_winsys.h >> +++ b/src/gallium/drivers/radeon/radeon_winsys.h >> @@ -45,21 +45,23 @@ enum radeon_bo_layout { >> RADEON_LAYOUT_LINEAR = 0, >> RADEON_LAYOUT_TILED, >> RADEON_LAYOUT_SQUARETILED, >> >> RADEON_LAYOUT_UNKNOWN >> }; >> >> enum radeon_bo_domain { /* bitfield */ >> RADEON_DOMAIN_GTT = 2, >> RADEON_DOMAIN_VRAM = 4, >> - RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT >> + RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT, >> + RADEON_DOMAIN_GDS = 8, >> + RADEON_DOMAIN_OA = 16, >> }; >> >> enum radeon_bo_flag { /* bitfield */ >> RADEON_FLAG_GTT_WC = (1 << 0), >> RADEON_FLAG_NO_CPU_ACCESS = (1 << 1), >> RADEON_FLAG_NO_SUBALLOC = (1 << 2), >> RADEON_FLAG_SPARSE = (1 << 3), >> RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4), >> RADEON_FLAG_READ_ONLY = (1 << 5), >> RADEON_FLAG_32BIT = (1 << 6), >> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c >> b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c >> index a9170a2bc69..1470c873a6a 100644 >> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c >> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c >> @@ -177,22 +177,24 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf) >> simple_mtx_lock(&ws->global_bo_list_lock); >> LIST_DEL(&bo->u.real.global_list_item); >> ws->num_buffers--; >> simple_mtx_unlock(&ws->global_bo_list_lock); >> } >> >> simple_mtx_lock(&ws->bo_export_table_lock); >> util_hash_table_remove(ws->bo_export_table, bo->bo); >> simple_mtx_unlock(&ws->bo_export_table_lock); >> >> - amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, >> AMDGPU_VA_OP_UNMAP); >> - amdgpu_va_range_free(bo->u.real.va_handle); >> + if (bo->initial_domain & RADEON_DOMAIN_VRAM_GTT) { >> + amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, >> AMDGPU_VA_OP_UNMAP); >> + amdgpu_va_range_free(bo->u.real.va_handle); >> + } >> amdgpu_bo_free(bo->bo); >> >> amdgpu_bo_remove_fences(bo); >> >> if (bo->initial_domain & RADEON_DOMAIN_VRAM) >> ws->allocated_vram -= align64(bo->base.size, >> ws->info.gart_page_size); >> else if (bo->initial_domain & RADEON_DOMAIN_GTT) >> ws->allocated_gtt -= align64(bo->base.size, >> ws->info.gart_page_size); >> >> if (bo->u.real.map_count >= 1) { >> @@ -418,25 +420,26 @@ static struct amdgpu_winsys_bo >> *amdgpu_create_bo(struct amdgpu_winsys *ws, >> unsigned alignment, >> enum radeon_bo_domain >> initial_domain, >> unsigned flags, >> int heap) >> { >> struct amdgpu_bo_alloc_request request = {0}; >> amdgpu_bo_handle buf_handle; >> uint64_t va = 0; >> struct amdgpu_winsys_bo *bo; >> amdgpu_va_handle va_handle; >> - unsigned va_gap_size; >> int r; >> >> /* VRAM or GTT must be specified, but not both at the same time. */ >> - assert(util_bitcount(initial_domain & RADEON_DOMAIN_VRAM_GTT) == 1); >> + assert(util_bitcount(initial_domain & (RADEON_DOMAIN_VRAM_GTT | >> + RADEON_DOMAIN_GDS | >> + RADEON_DOMAIN_OA)) == 1); >> >> /* Gfx9: Overallocate the size to the next power of two for faster >> address >> * translation if we don't waste too much memory. >> */ >> if (ws->info.chip_class >= GFX9) { >> uint64_t next_pot_size = util_next_power_of_two64(size); >> >> /* For slightly lower than 4 GB allocations, at most 32 MB are >> wasted. >> * For slightly lower than 256 MB allocations, at most 2 MB are >> wasted. >> * For slightly lower than 64 MB allocations, at most 512 KB are >> wasted. >> @@ -464,20 +467,24 @@ static struct amdgpu_winsys_bo >> *amdgpu_create_bo(struct amdgpu_winsys *ws, >> pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, >> &bo->base, >> heap); >> } >> request.alloc_size = size; >> request.phys_alignment = alignment; >> >> if (initial_domain & RADEON_DOMAIN_VRAM) >> request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; >> if (initial_domain & RADEON_DOMAIN_GTT) >> request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; >> + if (initial_domain & RADEON_DOMAIN_GDS) >> + request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS; >> + if (initial_domain & RADEON_DOMAIN_OA) >> + request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA; >> >> /* Since VRAM and GTT have almost the same performance on APUs, we >> could >> * just set GTT. However, in order to decrease GTT(RAM) usage, which >> is >> * shared with the OS, allow VRAM placements too. The idea is not to >> use >> * VRAM usefully, but to use it so that it's not unused and wasted. >> */ >> if (!ws->info.has_dedicated_vram) >> request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; >> >> if (flags & RADEON_FLAG_NO_CPU_ACCESS) >> @@ -493,41 +500,43 @@ static struct amdgpu_winsys_bo >> *amdgpu_create_bo(struct amdgpu_winsys *ws, >> >> r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); >> if (r) { >> fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n"); >> fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size); >> fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment); >> fprintf(stderr, "amdgpu: domains : %u\n", initial_domain); >> goto error_bo_alloc; >> } >> >> - va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0; >> + if (initial_domain & RADEON_DOMAIN_VRAM_GTT) { >> + unsigned va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * >> 1024) : 0; >> >> - r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, >> - size + va_gap_size, >> - amdgpu_get_optimal_vm_alignment(ws, size, >> alignment), >> - 0, &va, &va_handle, >> - (flags & RADEON_FLAG_32BIT ? >> AMDGPU_VA_RANGE_32_BIT : 0) | >> - AMDGPU_VA_RANGE_HIGH); >> - if (r) >> - goto error_va_alloc; >> + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, >> + size + va_gap_size, >> + amdgpu_get_optimal_vm_alignment(ws, >> size, alignment), >> + 0, &va, &va_handle, >> + (flags & RADEON_FLAG_32BIT ? >> AMDGPU_VA_RANGE_32_BIT : 0) | >> + AMDGPU_VA_RANGE_HIGH); >> + if (r) >> + goto error_va_alloc; >> >> - unsigned vm_flags = AMDGPU_VM_PAGE_READABLE | >> - AMDGPU_VM_PAGE_EXECUTABLE; >> + unsigned vm_flags = AMDGPU_VM_PAGE_READABLE | >> + AMDGPU_VM_PAGE_EXECUTABLE; >> >> - if (!(flags & RADEON_FLAG_READ_ONLY)) >> - vm_flags |= AMDGPU_VM_PAGE_WRITEABLE; >> + if (!(flags & RADEON_FLAG_READ_ONLY)) >> + vm_flags |= AMDGPU_VM_PAGE_WRITEABLE; >> >> - r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags, >> + r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags, >> AMDGPU_VA_OP_MAP); >> - if (r) >> - goto error_va_map; >> + if (r) >> + goto error_va_map; >> + } >> >> pipe_reference_init(&bo->base.reference, 1); >> bo->base.alignment = alignment; >> bo->base.usage = 0; >> bo->base.size = size; >> bo->base.vtbl = &amdgpu_winsys_bo_vtbl; >> bo->ws = ws; >> bo->bo = buf_handle; >> bo->va = va; >> bo->u.real.va_handle = va_handle; >> @@ -1328,22 +1337,24 @@ no_slab: >> return amdgpu_bo_sparse_create(ws, size, domain, flags); >> } >> >> /* This flag is irrelevant for the cache. */ >> flags &= ~RADEON_FLAG_NO_SUBALLOC; >> >> /* Align size to page size. This is the minimum alignment for normal >> * BOs. Aligning this here helps the cached bufmgr. Especially small >> BOs, >> * like constant/uniform buffers, can benefit from better and more >> reuse. >> */ >> - size = align64(size, ws->info.gart_page_size); >> - alignment = align(alignment, ws->info.gart_page_size); >> + if (domain & RADEON_DOMAIN_VRAM_GTT) { >> + size = align64(size, ws->info.gart_page_size); >> + alignment = align(alignment, ws->info.gart_page_size); >> + } >> >> bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING; >> >> if (use_reusable_pool) { >> heap = radeon_get_heap_index(domain, flags); >> assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS); >> >> /* Get a buffer from the cache. */ >> bo = (struct amdgpu_winsys_bo*) >> pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, 0, >> heap); >> -- >> 2.17.1 >> >> _______________________________________________ >> mesa-dev mailing list >> [email protected] >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev >> >
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
