Re: [Mesa-dev] [PATCH 2/9] ac: change legacy_surf_level::slice_size to dword units

Nicolai Hähnle Thu, 23 Nov 2017 00:18:57 -0800

On 22.11.2017 22:33, Marek Olšák wrote:

On Wed, Nov 22, 2017 at 7:46 PM, Nicolai Hähnle <[email protected]<mailto:[email protected]>> wrote:

 > On 21.11.2017 18:30, Marek Olšák wrote:
 >>
 >> From: Marek Olšák <[email protected] <mailto:[email protected]>>
 >>
 >> The next commit will reduce the size even more.
 >> ---
 >>   src/amd/common/ac_surface.c                        |  2 +-
 >>   src/amd/common/ac_surface.h                        |  2 +-
 >>   src/amd/vulkan/radv_image.c                        |  8 ++++----
 >>   src/gallium/drivers/r600/evergreen_state.c         |  8 ++++----
 >>   src/gallium/drivers/r600/r600_state.c              |  8 ++++----
 >>   src/gallium/drivers/r600/r600_texture.c            | 14 +++++++-------
 >>   src/gallium/drivers/r600/radeon_uvd.c              |  2 +-
 >>   src/gallium/drivers/radeon/r600_texture.c          | 14 +++++++-------
 >>   src/gallium/drivers/radeon/radeon_uvd.c            |  2 +-
 >>   src/gallium/drivers/radeonsi/cik_sdma.c            |  4 ++--
 >>   src/gallium/drivers/radeonsi/si_dma.c              |  8 ++++----
 >>   src/gallium/winsys/radeon/drm/radeon_drm_surface.c |  4 ++--
 >>   12 files changed, 38 insertions(+), 38 deletions(-)
 >>
 >> diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
 >> index f7600a3..2b6c3fb 100644
 >> --- a/src/amd/common/ac_surface.c
 >> +++ b/src/amd/common/ac_surface.c
 >> @@ -297,21 +297,21 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib,
 >>         ret = AddrComputeSurfaceInfo(addrlib,
 >>                                      AddrSurfInfoIn,
 >>                                      AddrSurfInfoOut);
 >>         if (ret != ADDR_OK) {
 >>                 return ret;
 >>         }
 >>         surf_level = is_stencil ? &surf->u.legacy.stencil_level[level] :
 >> &surf->u.legacy.level[level];
 >>         surf_level->offset = align64(surf->surf_size,
 >> AddrSurfInfoOut->baseAlign);
 >> -       surf_level->slice_size = AddrSurfInfoOut->sliceSize;
 >> +       surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4;
 >>         surf_level->nblk_x = AddrSurfInfoOut->pitch;
 >>         surf_level->nblk_y = AddrSurfInfoOut->height;
 >>         switch (AddrSurfInfoOut->tileMode) {
 >>         case ADDR_TM_LINEAR_ALIGNED:
 >>                 surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
 >>                 break;
 >>         case ADDR_TM_1D_TILED_THIN1:
 >>                 surf_level->mode = RADEON_SURF_MODE_1D;
 >>                 break;
 >> diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
 >> index 1dc95cd..ec89f6b 100644
 >> --- a/src/amd/common/ac_surface.h
 >> +++ b/src/amd/common/ac_surface.h
 >> @@ -64,21 +64,21 @@ enum radeon_micro_mode {
 >>   /* bits 19 and 20 are reserved for libdrm_radeon, don't use them */
 >>   #define RADEON_SURF_FMASK                       (1 << 21)
 >>   #define RADEON_SURF_DISABLE_DCC                 (1 << 22)
 >>   #define RADEON_SURF_TC_COMPATIBLE_HTILE         (1 << 23)
 >>   #define RADEON_SURF_IMPORTED                    (1 << 24)
 >>   #define RADEON_SURF_OPTIMIZE_FOR_SPACE          (1 << 25)
 >>   #define RADEON_SURF_SHAREABLE                   (1 << 26)
 >>     struct legacy_surf_level {
 >>       uint64_t                    offset;
 >> -    uint64_t                    slice_size;

>> + uint32_t slice_size_dw; /* in dwords; max =4GB /

 >> 4. */
 >>       uint32_t                    dcc_offset; /* relative offset within
 >> DCC mip tree */
 >>       uint32_t                    dcc_fast_clear_size;
 >>       uint16_t                    nblk_x;
 >>       uint16_t                    nblk_y;
 >>       enum radeon_surf_mode       mode;
 >>   };
 >>     struct legacy_surf_layout {
 >>       unsigned                    bankw:4;  /* max 8 */
 >>       unsigned                    bankh:4;  /* max 8 */
 >> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
 >> index b532aa9..fb7bbde 100644
 >> --- a/src/amd/vulkan/radv_image.c
 >> +++ b/src/amd/vulkan/radv_image.c
 >> @@ -1149,25 +1149,25 @@ void radv_GetImageSubresourceLayout(
 >>         if (device->physical_device->rad_info.chip_class >= GFX9) {
 >>                 pLayout->offset = surface->u.gfx9.offset[level] +
 >> surface->u.gfx9.surf_slice_size * layer;
 >>                 pLayout->rowPitch = surface->u.gfx9.surf_pitch *
 >> surface->bpe;
 >>                 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
 >>                 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
 >>                 pLayout->size = surface->u.gfx9.surf_slice_size;
 >>                 if (image->type == VK_IMAGE_TYPE_3D)
 >>                         pLayout->size *= u_minify(image->info.depth,
 >> level);
 >>         } else {

>> - pLayout->offset =surface->u.legacy.level[level].offset +

 >> surface->u.legacy.level[level].slice_size * layer;

>> + pLayout->offset =surface->u.legacy.level[level].offset +

 >> surface->u.legacy.level[level].slice_size_dw * 4 * layer;
 >
 >
 > I believe the maximum slice size in bytes is (with an RGBA32 texture)
 >
 > 16384 * 16384 * 16 = 2^14 * 2^14 * 2^4 = 2^32
 >
 > The problem with this code is that the multiplication is now performed as
 > uint32_t and can therefore wrap-around. So an explicit cast to 64-bits is
 > required.
 >

> In practice, I guess this rather becomes an issue with smaller slicesizes

 > but larger layer indices. We really need test case to exercise >= 4 GB
 > textures...


This should do it:

diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
index f18548f..fa17b34 100644
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@@ -71,7 +71,8 @@ enum radeon_micro_mode {

  struct legacy_surf_level {
      uint64_t                    offset;

- uint32_t slice_size_dw; /* in dwords; max = 4GB/ 4. */+ /* Declare 32 bits of uint64_t, so that multiplication results in64 bits. */+ uint64_t slice_size_dw:32; /* in dwords; max =4GB / 4. */ uint32_t dcc_offset; /* relative offset withinDCC mip tree */

      uint32_t                    dcc_fast_clear_size;
      unsigned                    nblk_x:15;


Congratulations, you found a compiler bug! :)

I like this idea very much; however:

nha@capella:~/tmp$ cat foo.c
#include <stdint.h>

struct s {
    uint64_t foo:32;
};

uint64_t foo(uint32_t x, struct s* p)
{
    return x * p->foo;
}

uint64_t foo2(uint32_t x, struct s* p)
{
    return x * (uint64_t)p->foo;
}
nha@capella:~/tmp$ gcc -Wall -Wextra -O2 -S foo.c
nha@capella:~/tmp$ cat foo.s
        .file   "foo.c"
        .text
        .p2align 4,,15
        .globl  foo
        .type   foo, @function
foo:
.LFB0:
        .cfi_startproc
        movl    %edi, %eax
        imull   (%rsi), %eax
        ret
        .cfi_endproc
.LFE0:
        .size   foo, .-foo
        .p2align 4,,15
        .globl  foo2
        .type   foo2, @function
foo2:
.LFB1:
        .cfi_startproc
        movl    (%rsi), %eax
        movl    %edi, %edi
        imulq   %rdi, %rax
        ret
        .cfi_endproc
.LFE1:
        .size   foo2, .-foo2
        .ident  "GCC: (Ubuntu 6.3.0-12ubuntu2) 6.3.0 20170406"
        .section        .note.GNU-stack,"",@progbits

This is with gcc-7.2.

Some more observations:

- Clang 5.0 produces the same code

- gcc produces entirely ridiculous code when the bitfield has 33 or morebits (it ends up producing what is essentially a 33-bit wide multiplication)- Clang actually produces the expected code when the bitfield has 33 ormore bits

- gcc produces the expected code when compiling it as C++

Could you please file bugs against both gcc and clang for this?

I mean, it's entirely possible that the C standard is crazy enough thatit says this behavior is technically correct -- though I doubt it giventhe behavior for 33 bits and the inconsistency between the compilers forC++. In any case, *if* treating the value as less than 64 bits iscorrect, they should really produce a warning since the resulting codeis dangerously different from what you'd expect.


Cheers,
Nicolai


Marek



--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/9] ac: change legacy_surf_level::slice_size to dword units

Reply via email to