On 11 July 2017 at 14:27, Alex Smith <[email protected]> wrote:
> On 10 July 2017 at 05:59, Dave Airlie <[email protected]> wrote: > >> From: Dave Airlie <[email protected]> >> >> This patch uses addrlib to workout the tile swizzles according >> to the surface index. It seems to produce the same values as >> amdgpu-pro for the deferred test. >> >> v2: don't apply swizzle to CMASK. the eg docs don't mention >> it, and we clearly don't align cmask for that. >> v3: disable surf index for dedicated images, as these will >> most likely be shared, and I don't think the metadata has >> space for this info in it yet. >> > > FWIW, disabling this for images marked as dedicated means this won't get > any improvements for render targets on our games. We create all render > targets as dedicated when NV_dedicated_allocation is available since this > gets us significant perf improvement on NVIDIA. > > If it's not currently possible to have this enabled for dedicated images > we could avoid using it on AMD, though I'm curious if there's likely to be > any other perf benefits to marking RTs as dedicated we'd then be missing > out on? I've not done any testing to see if there's any benefit from using > it. > Realised this possibly didn't sound clear - what I'm asking is does using NV_dedicated_allocation give any perf benefit on RADV at all like it does for NV? If not we could avoid it to get the benefits from this patch. Alex > > Thanks, > Alex > > >> This gets the deferred demo from 730->950fps on my rx480. >> (dcc cmask elim predication patches get it further) >> I'm also seeing some improvements in Mad Max at 4K >> >> Signed-off-by: Dave Airlie <[email protected]> >> >> fixup for dedicate >> --- >> src/amd/common/ac_surface.c | 14 ++++++++++++++ >> src/amd/common/ac_surface.h | 2 ++ >> src/amd/vulkan/radv_device.c | 7 ++++++- >> src/amd/vulkan/radv_image.c | 19 ++++++++++++++++++- >> src/amd/vulkan/radv_private.h | 2 ++ >> 5 files changed, 42 insertions(+), 2 deletions(-) >> >> diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c >> index 23fb66b..0aebacc 100644 >> --- a/src/amd/common/ac_surface.c >> +++ b/src/amd/common/ac_surface.c >> @@ -692,6 +692,20 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, >> surf->htile_size *= 2; >> >> surf->is_linear = surf->u.legacy.level[0].mode == >> RADEON_SURF_MODE_LINEAR_ALIGNED; >> + >> + /* workout base swizzle */ >> + if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) { >> + ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0}; >> + ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = >> {0}; >> + >> + AddrBaseSwizzleIn.surfIndex = config->info.surf_index; >> + AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex; >> + AddrBaseSwizzleIn.macroModeIndex = >> AddrSurfInfoOut.macroModeIndex; >> + AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo; >> + AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode; >> + AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, >> &AddrBaseSwizzleOut); >> + surf->u.legacy.combined_swizzle = >> AddrBaseSwizzleOut.tileSwizzle; >> + } >> return 0; >> } >> >> diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h >> index 4d893ff..7901b86 100644 >> --- a/src/amd/common/ac_surface.h >> +++ b/src/amd/common/ac_surface.h >> @@ -97,6 +97,7 @@ struct legacy_surf_layout { >> unsigned depth_adjusted:1; >> unsigned stencil_adjusted:1; >> >> + uint8_t combined_swizzle; >> struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS]; >> struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS]; >> uint8_t tiling_index[RADEON_SURF_MAX_LEVELS]; >> @@ -194,6 +195,7 @@ struct ac_surf_info { >> uint32_t width; >> uint32_t height; >> uint32_t depth; >> + uint32_t surf_index; >> uint8_t samples; >> uint8_t levels; >> uint16_t array_size; >> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c >> index 789c90d..eb77914 100644 >> --- a/src/amd/vulkan/radv_device.c >> +++ b/src/amd/vulkan/radv_device.c >> @@ -2757,7 +2757,8 @@ radv_initialise_color_surface(struct radv_device >> *device, >> } >> >> cb->cb_color_base = va >> 8; >> - >> + if (device->physical_device->rad_info.chip_class < GFX9) >> + cb->cb_color_base |= iview->image->surface.u.legacy >> .combined_swizzle; >> /* CMASK variables */ >> va = device->ws->buffer_get_va(iview->bo) + iview->image->offset; >> va += iview->image->cmask.offset; >> @@ -2766,6 +2767,8 @@ radv_initialise_color_surface(struct radv_device >> *device, >> va = device->ws->buffer_get_va(iview->bo) + iview->image->offset; >> va += iview->image->dcc_offset; >> cb->cb_dcc_base = va >> 8; >> + if (device->physical_device->rad_info.chip_class < GFX9) >> + cb->cb_dcc_base |= iview->image->surface.u.legacy >> .combined_swizzle; >> >> uint32_t max_slice = radv_surface_layer_count(iview); >> cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) | >> @@ -2781,6 +2784,8 @@ radv_initialise_color_surface(struct radv_device >> *device, >> if (iview->image->fmask.size) { >> va = device->ws->buffer_get_va(iview->bo) + >> iview->image->offset + iview->image->fmask.offset; >> cb->cb_color_fmask = va >> 8; >> + if (device->physical_device->rad_info.chip_class < GFX9) >> + cb->cb_color_fmask |= >> iview->image->surface.u.legacy.combined_swizzle; >> } else { >> cb->cb_color_fmask = cb->cb_color_base; >> } >> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c >> index 115e5a5..28f3874 100644 >> --- a/src/amd/vulkan/radv_image.c >> +++ b/src/amd/vulkan/radv_image.c >> @@ -27,10 +27,12 @@ >> >> #include "radv_private.h" >> #include "vk_format.h" >> +#include "vk_util.h" >> #include "radv_radeon_winsys.h" >> #include "sid.h" >> #include "gfx9d.h" >> #include "util/debug.h" >> +#include "util/u_atomic.h" >> static unsigned >> radv_choose_tiling(struct radv_device *Device, >> const struct radv_image_create_info *create_info) >> @@ -209,6 +211,8 @@ si_set_mutable_tex_desc_fields(struct radv_device >> *device, >> va += base_level_info->offset; >> >> state[0] = va >> 8; >> + if (chip_class < GFX9) >> + state[0] |= image->surface.u.legacy.combined_swizzle; >> state[1] &= C_008F14_BASE_ADDRESS_HI; >> state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); >> state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image, >> base_level, >> @@ -224,7 +228,8 @@ si_set_mutable_tex_desc_fields(struct radv_device >> *device, >> meta_va += base_level_info->dcc_offset; >> state[6] |= S_008F28_COMPRESSION_EN(1); >> state[7] = meta_va >> 8; >> - >> + if (chip_class < GFX9) >> + state[7] |= image->surface.u.legacy.combin >> ed_swizzle; >> } >> } >> >> @@ -472,6 +477,8 @@ si_make_texture_descriptor(struct radv_device >> *device, >> } >> >> fmask_state[0] = va >> 8; >> + if (device->physical_device->rad_info.chip_class < GFX9) >> + fmask_state[0] |= image->surface.u.legacy.combin >> ed_swizzle; >> fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | >> S_008F14_DATA_FORMAT_GFX6(fmask_format) | >> S_008F14_NUM_FORMAT_GFX6(num_format); >> @@ -752,6 +759,7 @@ radv_image_create(VkDevice _device, >> const VkImageCreateInfo *pCreateInfo = create_info->vk_info; >> struct radv_image *image = NULL; >> bool can_cmask_dcc = false; >> + bool dedicated = false; >> assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE >> _INFO); >> >> radv_assert(pCreateInfo->mipLevels > 0); >> @@ -761,6 +769,11 @@ radv_image_create(VkDevice _device, >> radv_assert(pCreateInfo->extent.height > 0); >> radv_assert(pCreateInfo->extent.depth > 0); >> >> + const VkDedicatedAllocationImageCreateInfoNV *dedicate_info = >> + vk_find_struct_const(pCreateInfo->pNext, >> DEDICATED_ALLOCATION_IMAGE_CREATE_INFO_NV); >> + >> + if (dedicate_info && dedicate_info->dedicatedAllocation) >> + dedicated = true; >> image = vk_alloc2(&device->alloc, alloc, sizeof(*image), 8, >> VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); >> if (!image) >> @@ -789,6 +802,10 @@ radv_image_create(VkDevice _device, >> image->queue_family_mask |= 1u << >> pCreateInfo->pQueueFamilyIndices[i]; >> } >> >> + if (!vk_format_is_depth(pCreateInfo->format) && >> !create_info->scanout && !dedicated) { >> + image->info.surf_index = >> p_atomic_inc_return(&device->image_mrt_offset_counter) >> - 1; >> + } >> + >> radv_init_surface(device, &image->surface, create_info); >> >> device->ws->surface_init(device->ws, &image->info, >> &image->surface); >> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private. >> h >> index a167409..b0533bd 100644 >> --- a/src/amd/vulkan/radv_private.h >> +++ b/src/amd/vulkan/radv_private.h >> @@ -547,6 +547,8 @@ struct radv_device { >> >> /* Backup in-memory cache to be used if the app doesn't provide >> one */ >> struct radv_pipeline_cache * mem_cache; >> + >> + uint32_t image_mrt_offset_counter; >> }; >> >> struct radv_device_memory { >> -- >> 2.9.4 >> >> _______________________________________________ >> mesa-dev mailing list >> [email protected] >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev >> > >
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
