I would prefer it if the function could be split in two functions instead of using the initialized flag. I think we know whether it is an initialization or a reinitialization per call site.
- Bas On Thu, Aug 18, 2016 at 9:46 PM, Marek Olšák <[email protected]> wrote: > From: Marek Olšák <[email protected]> > > Invalidated buffers don't have to do this. > --- > src/gallium/drivers/radeon/r600_buffer_common.c | 147 > +++++++++++++----------- > src/gallium/drivers/radeon/r600_pipe_common.h | 2 + > 2 files changed, 80 insertions(+), 69 deletions(-) > > diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c > b/src/gallium/drivers/radeon/r600_buffer_common.c > index 4480293..113a7dc 100644 > --- a/src/gallium/drivers/radeon/r600_buffer_common.c > +++ b/src/gallium/drivers/radeon/r600_buffer_common.c > @@ -98,91 +98,108 @@ void *r600_buffer_map_sync_with_rings(struct > r600_common_context *ctx, > /* Setting the CS to NULL will prevent doing checks we have done > already. */ > return ctx->ws->buffer_map(resource->buf, NULL, usage); > } > > bool r600_init_resource(struct r600_common_screen *rscreen, > struct r600_resource *res, > uint64_t size, unsigned alignment) > { > struct r600_texture *rtex = (struct r600_texture*)res; > struct pb_buffer *old_buf, *new_buf; > - enum radeon_bo_flag flags = 0; > - > - switch (res->b.b.usage) { > - case PIPE_USAGE_STREAM: > - flags = RADEON_FLAG_GTT_WC; > - /* fall through */ > - case PIPE_USAGE_STAGING: > - /* Transfers are likely to occur more often with these > resources. */ > - res->domains = RADEON_DOMAIN_GTT; > - break; > - case PIPE_USAGE_DYNAMIC: > - /* Older kernels didn't always flush the HDP cache before > - * CS execution > - */ > - if (rscreen->info.drm_major == 2 && > - rscreen->info.drm_minor < 40) { > + > + if (!res->initialized) { > + res->flags = 0; > + > + switch (res->b.b.usage) { > + case PIPE_USAGE_STREAM: > + res->flags = RADEON_FLAG_GTT_WC; > + /* fall through */ > + case PIPE_USAGE_STAGING: > + /* Transfers are likely to occur more often with these > + * resources. */ > res->domains = RADEON_DOMAIN_GTT; > - flags |= RADEON_FLAG_GTT_WC; > + break; > + case PIPE_USAGE_DYNAMIC: > + /* Older kernels didn't always flush the HDP cache > before > + * CS execution > + */ > + if (rscreen->info.drm_major == 2 && > + rscreen->info.drm_minor < 40) { > + res->domains = RADEON_DOMAIN_GTT; > + res->flags |= RADEON_FLAG_GTT_WC; > + break; > + } > + res->flags |= RADEON_FLAG_CPU_ACCESS; > + /* fall through */ > + case PIPE_USAGE_DEFAULT: > + case PIPE_USAGE_IMMUTABLE: > + default: > + /* Not listing GTT here improves performance in some > + * apps. */ > + res->domains = RADEON_DOMAIN_VRAM; > + res->flags |= RADEON_FLAG_GTT_WC; > break; > } > - flags |= RADEON_FLAG_CPU_ACCESS; > - /* fall through */ > - case PIPE_USAGE_DEFAULT: > - case PIPE_USAGE_IMMUTABLE: > - default: > - /* Not listing GTT here improves performance in some apps. */ > - res->domains = RADEON_DOMAIN_VRAM; > - flags |= RADEON_FLAG_GTT_WC; > - break; > - } > > - if (res->b.b.target == PIPE_BUFFER && > - res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | > - PIPE_RESOURCE_FLAG_MAP_COHERENT)) { > - /* Use GTT for all persistent mappings with older kernels, > - * because they didn't always flush the HDP cache before CS > - * execution. > - * > - * Write-combined CPU mappings are fine, the kernel ensures > all CPU > - * writes finish before the GPU executes a command stream. > + if (res->b.b.target == PIPE_BUFFER && > + res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | > + PIPE_RESOURCE_FLAG_MAP_COHERENT)) { > + /* Use GTT for all persistent mappings with older > + * kernels, because they didn't always flush the HDP > + * cache before CS execution. > + * > + * Write-combined CPU mappings are fine, the kernel > + * ensures all CPU writes finish before the GPU > + * executes a command stream. > + */ > + if (rscreen->info.drm_major == 2 && > + rscreen->info.drm_minor < 40) > + res->domains = RADEON_DOMAIN_GTT; > + else if (res->domains & RADEON_DOMAIN_VRAM) > + res->flags |= RADEON_FLAG_CPU_ACCESS; > + } > + > + /* Tiled textures are unmappable. Always put them in VRAM. */ > + if (res->b.b.target != PIPE_BUFFER && > + rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) { > + res->domains = RADEON_DOMAIN_VRAM; > + res->flags &= ~RADEON_FLAG_CPU_ACCESS; > + res->flags |= RADEON_FLAG_NO_CPU_ACCESS | > + RADEON_FLAG_GTT_WC; > + } > + > + /* If VRAM is just stolen system memory, allow both VRAM and > + * GTT, whichever has free space. If a buffer is evicted from > + * VRAM to GTT, it will stay there. > */ > - if (rscreen->info.drm_major == 2 && > - rscreen->info.drm_minor < 40) > - res->domains = RADEON_DOMAIN_GTT; > - else if (res->domains & RADEON_DOMAIN_VRAM) > - flags |= RADEON_FLAG_CPU_ACCESS; > - } > + if (!rscreen->info.has_dedicated_vram && > + res->domains == RADEON_DOMAIN_VRAM) > + res->domains = RADEON_DOMAIN_VRAM_GTT; > > - /* Tiled textures are unmappable. Always put them in VRAM. */ > - if (res->b.b.target != PIPE_BUFFER && > - rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) { > - res->domains = RADEON_DOMAIN_VRAM; > - flags &= ~RADEON_FLAG_CPU_ACCESS; > - flags |= RADEON_FLAG_NO_CPU_ACCESS | > - RADEON_FLAG_GTT_WC; > - } > + if (rscreen->debug_flags & DBG_NO_WC) > + res->flags &= ~RADEON_FLAG_GTT_WC; > > - /* If VRAM is just stolen system memory, allow both VRAM and GTT, > - * whichever has free space. If a buffer is evicted from VRAM to GTT, > - * it will stay there. > - */ > - if (!rscreen->info.has_dedicated_vram && > - res->domains == RADEON_DOMAIN_VRAM) > - res->domains = RADEON_DOMAIN_VRAM_GTT; > + /* Set expected VRAM and GART usage for the buffer. */ > + res->vram_usage = 0; > + res->gart_usage = 0; > > - if (rscreen->debug_flags & DBG_NO_WC) > - flags &= ~RADEON_FLAG_GTT_WC; > + if (res->domains & RADEON_DOMAIN_VRAM) > + res->vram_usage = size; > + else if (res->domains & RADEON_DOMAIN_GTT) > + res->gart_usage = size; > + > + res->initialized = true; > + } > > /* Allocate a new resource. */ > new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment, > - res->domains, flags); > + res->domains, res->flags); > if (!new_buf) { > return false; > } > > /* Replace the pointer such that if res->buf wasn't NULL, it won't be > * NULL. This should prevent crashes with multiple contexts using > * the same buffer where one of the contexts invalidates it while > * the others are using it. */ > old_buf = res->buf; > res->buf = new_buf; /* should be atomic */ > @@ -190,29 +207,20 @@ bool r600_init_resource(struct r600_common_screen > *rscreen, > if (rscreen->info.has_virtual_memory) > res->gpu_address = > rscreen->ws->buffer_get_virtual_address(res->buf); > else > res->gpu_address = 0; > > pb_reference(&old_buf, NULL); > > util_range_set_empty(&res->valid_buffer_range); > res->TC_L2_dirty = false; > > - /* Set expected VRAM and GART usage for the buffer. */ > - res->vram_usage = 0; > - res->gart_usage = 0; > - > - if (res->domains & RADEON_DOMAIN_VRAM) > - res->vram_usage = size; > - else if (res->domains & RADEON_DOMAIN_GTT) > - res->gart_usage = size; > - > /* Print debug information. */ > if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) { > fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | > Buffer %"PRIu64" bytes\n", > res->gpu_address, res->gpu_address + res->buf->size, > res->buf->size); > } > return true; > } > > static void r600_buffer_destroy(struct pipe_screen *screen, > @@ -496,20 +504,21 @@ r600_alloc_buffer_struct(struct pipe_screen *screen, > { > struct r600_resource *rbuffer; > > rbuffer = MALLOC_STRUCT(r600_resource); > > rbuffer->b.b = *templ; > pipe_reference_init(&rbuffer->b.b.reference, 1); > rbuffer->b.b.screen = screen; > rbuffer->b.vtbl = &r600_buffer_vtbl; > rbuffer->buf = NULL; > + rbuffer->initialized = false; > rbuffer->TC_L2_dirty = false; > rbuffer->is_shared = false; > util_range_init(&rbuffer->valid_buffer_range); > return rbuffer; > } > > struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, > const struct pipe_resource *templ, > unsigned alignment) > { > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h > b/src/gallium/drivers/radeon/r600_pipe_common.h > index 5375044..358d5f4 100644 > --- a/src/gallium/drivers/radeon/r600_pipe_common.h > +++ b/src/gallium/drivers/radeon/r600_pipe_common.h > @@ -169,20 +169,22 @@ struct r600_resource { > > /* Winsys objects. */ > struct pb_buffer *buf; > uint64_t gpu_address; > /* Memory usage if the buffer placement is optimal. */ > uint64_t vram_usage; > uint64_t gart_usage; > > /* Resource state. */ > enum radeon_bo_domain domains; > + enum radeon_bo_flag flags; > + bool initialized; > > /* The buffer range which is initialized (with a write transfer, > * streamout, DMA, or as a random access target). The rest of > * the buffer is considered invalid and can be mapped unsynchronized. > * > * This allows unsychronized mapping of a buffer range which hasn't > * been used yet. It's for applications which forget to use > * the unsynchronized map flag and expect the driver to figure it out. > */ > struct util_range valid_buffer_range; > -- > 2.7.4 > > _______________________________________________ > mesa-dev mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
