From: Marek Olšák <marek.ol...@amd.com> Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> --- src/amd/addrlib/addrinterface.cpp | 32 ++++- src/amd/addrlib/addrinterface.h | 62 ++++++---- src/amd/addrlib/addrtypes.h | 6 +- src/amd/addrlib/amdgpu_asic_addr.h | 3 + src/amd/addrlib/core/addrlib.cpp | 80 +++++++++++-- src/amd/addrlib/core/addrlib.h | 36 +++++- src/amd/addrlib/core/addrlib1.cpp | 14 ++- src/amd/addrlib/core/addrlib2.cpp | 10 ++ src/amd/addrlib/core/addrlib2.h | 6 - src/amd/addrlib/gfx9/gfx9addrlib.cpp | 224 +++++++++++++++++++++++++---------- src/amd/addrlib/gfx9/gfx9addrlib.h | 21 ++-- src/amd/addrlib/r800/ciaddrlib.cpp | 40 +++++-- src/amd/addrlib/r800/ciaddrlib.h | 4 +- src/amd/addrlib/r800/egbaddrlib.cpp | 8 +- src/amd/addrlib/r800/siaddrlib.cpp | 35 ++++-- src/amd/addrlib/r800/siaddrlib.h | 4 +- src/amd/common/ac_surface.c | 2 +- 17 files changed, 439 insertions(+), 148 deletions(-)
diff --git a/src/amd/addrlib/addrinterface.cpp b/src/amd/addrlib/addrinterface.cpp index 5fdf7fc3c65..112431e2cb4 100644 --- a/src/amd/addrlib/addrinterface.cpp +++ b/src/amd/addrlib/addrinterface.cpp @@ -1047,38 +1047,68 @@ ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo( * * @brief * Convert maximum alignments * * @return * ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE **************************************************************************************************** */ ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments( ADDR_HANDLE hLib, ///< address lib handle - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) ///< [out] output structure + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) ///< [out] output structure { Addr::Lib* pLib = Lib::GetLib(hLib); ADDR_E_RETURNCODE returnCode = ADDR_OK; if (pLib != NULL) { returnCode = pLib->GetMaxAlignments(pOut); } else { returnCode = ADDR_ERROR; } return returnCode; } +/** +**************************************************************************************************** +* AddrGetMaxMetaAlignments +* +* @brief +* Convert maximum alignments for metadata +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments( + ADDR_HANDLE hLib, ///< address lib handle + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) ///< [out] output structure +{ + Addr::Lib* pLib = Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->GetMaxMetaAlignments(pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} //////////////////////////////////////////////////////////////////////////////////////////////////// // Surface functions for Addr2 //////////////////////////////////////////////////////////////////////////////////////////////////// /** **************************************************************************************************** * Addr2ComputeSurfaceInfo * diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h index 8124b745f21..be9e5c2b81e 100644 --- a/src/amd/addrlib/addrinterface.h +++ b/src/amd/addrlib/addrinterface.h @@ -521,21 +521,22 @@ typedef union _ADDR_SURFACE_FLAGS UINT_32 needEquation : 1; ///< Make the surface tile setting equation compatible. /// This flag indicates we need to override tile /// mode to PRT_* tile mode to disable slice rotation, /// which is needed by swizzle pattern equation. UINT_32 skipIndicesOutput : 1; ///< Skipping indices in output. UINT_32 rotateDisplay : 1; ///< Rotate micro tile type UINT_32 minimizeAlignment : 1; ///< Minimize alignment UINT_32 preferEquation : 1; ///< Return equation index without adjusting tile mode UINT_32 matchStencilTileCfg : 1; ///< Select tile index of stencil as well as depth surface /// to make sure they share same tile config parameters - UINT_32 reserved : 2; ///< Reserved bits + UINT_32 disallowLargeThickDegrade : 1; ///< Disallow large thick tile degrade + UINT_32 reserved : 1; ///< Reserved bits }; UINT_32 value; } ADDR_SURFACE_FLAGS; /** **************************************************************************************************** * ADDR_COMPUTE_SURFACE_INFO_INPUT * * @brief @@ -2266,21 +2267,21 @@ typedef struct _ADDR_COMPUTE_DCCINFO_INPUT **************************************************************************************************** * ADDR_COMPUTE_DCCINFO_OUTPUT * * @brief * Output structure of AddrComputeDccInfo **************************************************************************************************** */ typedef struct _ADDR_COMPUTE_DCCINFO_OUTPUT { UINT_32 size; ///< Size of this structure in bytes - UINT_64 dccRamBaseAlign; ///< Base alignment of dcc key + UINT_32 dccRamBaseAlign; ///< Base alignment of dcc key UINT_64 dccRamSize; ///< Size of dcc key UINT_64 dccFastClearSize; ///< Size of dcc key portion that can be fast cleared BOOL_32 subLvlCompressible; ///< Whether sub resource is compressiable BOOL_32 dccRamSizeAligned; ///< Whether the dcc key size is aligned } ADDR_COMPUTE_DCCINFO_OUTPUT; /** **************************************************************************************************** * AddrComputeDccInfo * @@ -2291,45 +2292,55 @@ typedef struct _ADDR_COMPUTE_DCCINFO_OUTPUT */ ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo( ADDR_HANDLE hLib, const ADDR_COMPUTE_DCCINFO_INPUT* pIn, ADDR_COMPUTE_DCCINFO_OUTPUT* pOut); /** **************************************************************************************************** -* ADDR_GET_MAX_ALIGNMENTS_OUTPUT +* ADDR_GET_MAX_ALINGMENTS_OUTPUT * * @brief * Output structure of AddrGetMaxAlignments **************************************************************************************************** */ -typedef struct _ADDR_GET_MAX_ALIGNMENTS_OUTPUT +typedef struct _ADDR_GET_MAX_ALINGMENTS_OUTPUT { UINT_32 size; ///< Size of this structure in bytes - UINT_64 baseAlign; ///< Maximum base alignment in bytes -} ADDR_GET_MAX_ALIGNMENTS_OUTPUT; + UINT_32 baseAlign; ///< Maximum base alignment in bytes +} ADDR_GET_MAX_ALINGMENTS_OUTPUT; /** **************************************************************************************************** * AddrGetMaxAlignments * * @brief * Gets maximnum alignments **************************************************************************************************** */ ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments( ADDR_HANDLE hLib, - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut); - + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut); +/** +**************************************************************************************************** +* AddrGetMaxMetaAlignments +* +* @brief +* Gets maximnum alignments for metadata +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments( + ADDR_HANDLE hLib, + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut); /** **************************************************************************************************** * Address library interface version 2 * available from Gfx9 hardware **************************************************************************************************** * Addr2ComputeSurfaceInfo() * Addr2ComputeSurfaceAddrFromCoord() * Addr2ComputeSurfaceCoordFromAddr() @@ -2359,36 +2370,39 @@ ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments( * ADDR2_SURFACE_FLAGS * * @brief * Surface flags **************************************************************************************************** */ typedef union _ADDR2_SURFACE_FLAGS { struct { - UINT_32 color : 1; ///< This resource is a color buffer, can be used with RTV - UINT_32 depth : 1; ///< Thie resource is a depth buffer, can be used with DSV - UINT_32 stencil : 1; ///< Thie resource is a stencil buffer, can be used with DSV - UINT_32 fmask : 1; ///< This is an fmask surface - UINT_32 overlay : 1; ///< This is an overlay surface - UINT_32 display : 1; ///< This resource is displable, can be used with DRV - UINT_32 prt : 1; ///< This is a partially resident texture - UINT_32 qbStereo : 1; ///< This is a quad buffer stereo surface - UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding - UINT_32 texture : 1; ///< This resource can be used with SRV - UINT_32 unordered : 1; ///< This resource can be used with UAV - UINT_32 rotated : 1; ///< This resource is rotated and displable - UINT_32 needEquation : 1; ///< This resource needs equation to be generated if possible - UINT_32 opt4space : 1; ///< This resource should be optimized for space - UINT_32 minimizeAlign : 1; ///< This resource should use minimum alignment - UINT_32 reserved : 17; ///< Reserved bits + UINT_32 color : 1; ///< This resource is a color buffer, can be used with RTV + UINT_32 depth : 1; ///< Thie resource is a depth buffer, can be used with DSV + UINT_32 stencil : 1; ///< Thie resource is a stencil buffer, can be used with DSV + UINT_32 fmask : 1; ///< This is an fmask surface + UINT_32 overlay : 1; ///< This is an overlay surface + UINT_32 display : 1; ///< This resource is displable, can be used with DRV + UINT_32 prt : 1; ///< This is a partially resident texture + UINT_32 qbStereo : 1; ///< This is a quad buffer stereo surface + UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding + UINT_32 texture : 1; ///< This resource can be used with SRV + UINT_32 unordered : 1; ///< This resource can be used with UAV + UINT_32 rotated : 1; ///< This resource is rotated and displable + UINT_32 needEquation : 1; ///< This resource needs equation to be generated if possible + UINT_32 opt4space : 1; ///< This resource should be optimized for space + UINT_32 minimizeAlign : 1; ///< This resource should use minimum alignment + UINT_32 noMetadata : 1; ///< This resource has no metadata + UINT_32 metaRbUnaligned : 1; ///< This resource has rb unaligned metadata + UINT_32 metaPipeUnaligned : 1; ///< This resource has pipe unaligned metadata + UINT_32 reserved : 14; ///< Reserved bits }; UINT_32 value; } ADDR2_SURFACE_FLAGS; /** **************************************************************************************************** * ADDR2_COMPUTE_SURFACE_INFO_INPUT * * @brief diff --git a/src/amd/addrlib/addrtypes.h b/src/amd/addrlib/addrtypes.h index f8f96d54a10..c63ad96ff97 100644 --- a/src/amd/addrlib/addrtypes.h +++ b/src/amd/addrlib/addrtypes.h @@ -69,32 +69,34 @@ typedef int INT; #ifndef ADDR_CDECL #if defined(__GNUC__) #define ADDR_CDECL __attribute__((cdecl)) #else #define ADDR_CDECL __cdecl #endif #endif #ifndef ADDR_STDCALL #if defined(__GNUC__) - #if defined(__AMD64__) + #if defined(__amd64__) || defined(__x86_64__) #define ADDR_STDCALL #else #define ADDR_STDCALL __attribute__((stdcall)) #endif #else #define ADDR_STDCALL __stdcall #endif #endif #ifndef ADDR_FASTCALL - #if defined(__GNUC__) + #if defined(BRAHMA_ARM) + #define ADDR_FASTCALL + #elif defined(__GNUC__) #if defined(__i386__) #define ADDR_FASTCALL __attribute__((regparm(0))) #else #define ADDR_FASTCALL #endif #else #define ADDR_FASTCALL __fastcall #endif #endif diff --git a/src/amd/addrlib/amdgpu_asic_addr.h b/src/amd/addrlib/amdgpu_asic_addr.h index ea957a88b4d..d7232ba14a2 100644 --- a/src/amd/addrlib/amdgpu_asic_addr.h +++ b/src/amd/addrlib/amdgpu_asic_addr.h @@ -78,20 +78,21 @@ #define AMDGPU_POLARIS10_RANGE 0x50, 0x5A #define AMDGPU_POLARIS11_RANGE 0x5A, 0x64 #define AMDGPU_POLARIS12_RANGE 0x64, 0x6E #define AMDGPU_CARRIZO_RANGE 0x01, 0x21 #define AMDGPU_BRISTOL_RANGE 0x10, 0x21 #define AMDGPU_STONEY_RANGE 0x61, 0xFF #define AMDGPU_VEGA10_RANGE 0x01, 0x14 +#define AMDGPU_VEGA12_RANGE 0x14, 0x28 #define AMDGPU_RAVEN_RANGE 0x01, 0x81 #define AMDGPU_EXPAND_FIX(x) x #define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max)) #define AMDGPU_IN_RANGE(val, ...) AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__)) // ASICREV_IS(eRevisionId, revisionName) #define ASICREV_IS(r, rn) AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE) @@ -116,14 +117,16 @@ #define ASICREV_IS_POLARIS10_P(r) ASICREV_IS(r, POLARIS10) #define ASICREV_IS_POLARIS11_M(r) ASICREV_IS(r, POLARIS11) #define ASICREV_IS_POLARIS12_V(r) ASICREV_IS(r, POLARIS12) #define ASICREV_IS_CARRIZO(r) ASICREV_IS(r, CARRIZO) #define ASICREV_IS_CARRIZO_BRISTOL(r) ASICREV_IS(r, BRISTOL) #define ASICREV_IS_STONEY(r) ASICREV_IS(r, STONEY) #define ASICREV_IS_VEGA10_M(r) ASICREV_IS(r, VEGA10) #define ASICREV_IS_VEGA10_P(r) ASICREV_IS(r, VEGA10) +#define ASICREV_IS_VEGA12_P(r) ASICREV_IS(r, VEGA12) +#define ASICREV_IS_VEGA12_p(r) ASICREV_IS(r, VEGA12) #define ASICREV_IS_RAVEN(r) ASICREV_IS(r, RAVEN) #endif // _AMDGPU_ASIC_ADDR_H diff --git a/src/amd/addrlib/core/addrlib.cpp b/src/amd/addrlib/core/addrlib.cpp index a6ac5ecf836..5af6dd1e339 100644 --- a/src/amd/addrlib/core/addrlib.cpp +++ b/src/amd/addrlib/core/addrlib.cpp @@ -278,24 +278,26 @@ ADDR_E_RETURNCODE Lib::Create( } } pCreateOut->hLib = pLib; if ((pLib != NULL) && (returnCode == ADDR_OK)) { pCreateOut->numEquations = pLib->HwlGetEquationTableInfo(&pCreateOut->pEquationTable); - } - if ((pLib == NULL) && - (returnCode == ADDR_OK)) + pLib->SetMaxAlignments(); + + } + else if ((pLib == NULL) && + (returnCode == ADDR_OK)) { // Unknown failures, we return the general error code returnCode = ADDR_ERROR; } return returnCode; } /** **************************************************************************************************** @@ -329,20 +331,37 @@ VOID Lib::SetChipFamily( * @return * N/A **************************************************************************************************** */ VOID Lib::SetMinPitchAlignPixels( UINT_32 minPitchAlignPixels) ///< [in] minmum pitch alignment in pixels { m_minPitchAlignPixels = (minPitchAlignPixels == 0) ? 1 : minPitchAlignPixels; } +/** +**************************************************************************************************** +* Lib::SetMaxAlignments +* +* @brief +* Set max alignments +* +* @return +* N/A +**************************************************************************************************** +*/ +VOID Lib::SetMaxAlignments() +{ + m_maxBaseAlign = HwlComputeMaxBaseAlignments(); + m_maxMetaBaseAlign = HwlComputeMaxMetaBaseAlignments(); +} + /** **************************************************************************************************** * Lib::GetLib * * @brief * Get AddrLib pointer * * @return * An AddrLib class pointer **************************************************************************************************** @@ -351,43 +370,90 @@ Lib* Lib::GetLib( ADDR_HANDLE hLib) ///< [in] handle of ADDR_HANDLE { return static_cast<Addr::Lib*>(hLib); } /** **************************************************************************************************** * Lib::GetMaxAlignments * * @brief -* Gets maximum alignments +* Gets maximum alignments for data surface (include FMask) * * @return * ADDR_E_RETURNCODE **************************************************************************************************** */ ADDR_E_RETURNCODE Lib::GetMaxAlignments( - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut ///< [out] output structure ) const { ADDR_E_RETURNCODE returnCode = ADDR_OK; if (GetFillSizeFieldsFlags() == TRUE) { - if (pOut->size != sizeof(ADDR_GET_MAX_ALIGNMENTS_OUTPUT)) + if (pOut->size != sizeof(ADDR_GET_MAX_ALINGMENTS_OUTPUT)) { returnCode = ADDR_PARAMSIZEMISMATCH; } } if (returnCode == ADDR_OK) { - returnCode = HwlGetMaxAlignments(pOut); + if (m_maxBaseAlign != 0) + { + pOut->baseAlign = m_maxBaseAlign; + } + else + { + returnCode = ADDR_NOTIMPLEMENTED; + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::GetMaxMetaAlignments +* +* @brief +* Gets maximum alignments for metadata (CMask, DCC and HTile) +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::GetMaxMetaAlignments( + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if (pOut->size != sizeof(ADDR_GET_MAX_ALINGMENTS_OUTPUT)) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + if (m_maxMetaBaseAlign != 0) + { + pOut->baseAlign = m_maxMetaBaseAlign; + } + else + { + returnCode = ADDR_NOTIMPLEMENTED; + } } return returnCode; } /** **************************************************************************************************** * Lib::Bits2Number * * @brief diff --git a/src/amd/addrlib/core/addrlib.h b/src/amd/addrlib/core/addrlib.h index 8db65a61c87..0cbb4e0186f 100644 --- a/src/amd/addrlib/core/addrlib.h +++ b/src/amd/addrlib/core/addrlib.h @@ -275,28 +275,52 @@ public: ADDR_E_RETURNCODE Flt32ToDepthPixel( const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn, ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const; ADDR_E_RETURNCODE Flt32ToColorPixel( const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn, ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const; BOOL_32 GetExportNorm(const ELEM_GETEXPORTNORM_INPUT* pIn) const; - ADDR_E_RETURNCODE GetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const; + ADDR_E_RETURNCODE GetMaxAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE GetMaxMetaAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const; protected: Lib(); // Constructor is protected Lib(const Client* pClient); - /// Pure virtual function to get max alignments - virtual ADDR_E_RETURNCODE HwlGetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const = 0; + /// Pure virtual function to get max base alignments + virtual UINT_32 HwlComputeMaxBaseAlignments() const = 0; + + /// Gets maximum alignements for metadata + virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const + { + ADDR_NOT_IMPLEMENTED(); + + return 0; + } + + VOID ValidBaseAlignments(UINT_32 alignment) const + { +#if DEBUG + ADDR_ASSERT(alignment <= m_maxBaseAlign); +#endif + } + + VOID ValidMetaBaseAlignments(UINT_32 metaAlignment) const + { +#if DEBUG + ADDR_ASSERT(metaAlignment <= m_maxMetaBaseAlign); +#endif + } // // Initialization // /// Pure Virtual function for Hwl computing internal global parameters from h/w registers virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn) = 0; /// Pure Virtual function for Hwl converting chip family virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision) = 0; @@ -334,20 +358,22 @@ private: // Disallow the copy constructor Lib(const Lib& a); // Disallow the assignment operator Lib& operator=(const Lib& a); VOID SetChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision); VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels); + VOID SetMaxAlignments(); + protected: LibClass m_class; ///< Store class type (HWL type) ChipFamily m_chipFamily; ///< Chip family translated from the one in atiid.h UINT_32 m_chipRevision; ///< Revision id from xxx_id.h UINT_32 m_version; ///< Current version // @@ -363,20 +389,24 @@ protected: UINT_32 m_pipeInterleaveBytes; ///< Specifies the size of contiguous address space /// within each tiling pipe when making linear /// accesses. (Formerly Group Size) UINT_32 m_rowSize; ///< DRAM row size, in bytes UINT_32 m_minPitchAlignPixels; ///< Minimum pitch alignment in pixels UINT_32 m_maxSamples; ///< Max numSamples + + UINT_32 m_maxBaseAlign; ///< Max base alignment for data surface + UINT_32 m_maxMetaBaseAlign; ///< Max base alignment for metadata + private: ElemLib* m_pElemLib; ///< Element Lib pointer }; Lib* SiHwlInit (const Client* pClient); Lib* CiHwlInit (const Client* pClient); Lib* Gfx9HwlInit (const Client* pClient); } // Addr diff --git a/src/amd/addrlib/core/addrlib1.cpp b/src/amd/addrlib/core/addrlib1.cpp index c796a63436c..9c1d84289b3 100644 --- a/src/amd/addrlib/core/addrlib1.cpp +++ b/src/amd/addrlib/core/addrlib1.cpp @@ -421,20 +421,22 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo( } } } pOut->pitchTileMax = pOut->pitch / 8 - 1; pOut->heightTileMax = pOut->height / 8 - 1; pOut->sliceTileMax = pOut->pitch * pOut->height / 64 - 1; } } + ValidBaseAlignments(pOut->baseAlign); + return returnCode; } /** **************************************************************************************************** * Lib::ComputeSurfaceInfo * * @brief * Interface function stub of AddrComputeSurfaceInfo. * @@ -888,20 +890,22 @@ ADDR_E_RETURNCODE Lib::ComputeFmaskInfo( } else { memset(pOut, 0, sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)); returnCode = ADDR_INVALIDPARAMS; } } } + ValidBaseAlignments(pOut->baseAlign); + return returnCode; } /** **************************************************************************************************** * Lib::ComputeFmaskAddrFromCoord * * @brief * Interface function stub of ComputeFmaskAddrFromCoord. * @@ -1326,20 +1330,22 @@ ADDR_E_RETURNCODE Lib::ComputeHtileInfo( &pOut->height, &pOut->htileBytes, &pOut->macroWidth, &pOut->macroHeight, &pOut->sliceSize, &pOut->baseAlign); } } } + ValidMetaBaseAlignments(pOut->baseAlign); + return returnCode; } /** **************************************************************************************************** * Lib::ComputeCmaskInfo * * @brief * Interface function stub of AddrComputeCmaskInfo * @@ -1392,20 +1398,22 @@ ADDR_E_RETURNCODE Lib::ComputeCmaskInfo( &pOut->height, &pOut->cmaskBytes, &pOut->macroWidth, &pOut->macroHeight, &pOut->sliceSize, &pOut->baseAlign, &pOut->blockMax); } } + ValidMetaBaseAlignments(pOut->baseAlign); + return returnCode; } /** **************************************************************************************************** * Lib::ComputeDccInfo * * @brief * Interface function to compute DCC key info * @@ -1436,23 +1444,25 @@ ADDR_E_RETURNCODE Lib::ComputeDccInfo( if (UseTileIndex(pIn->tileIndex)) { input = *pIn; ret = HwlSetupTileCfg(input.bpp, input.tileIndex, input.macroModeIndex, &input.tileInfo, &input.tileMode); pIn = &input; } - if (ADDR_OK == ret) + if (ret == ADDR_OK) { ret = HwlComputeDccInfo(pIn, pOut); + + ValidMetaBaseAlignments(pOut->dccRamBaseAlign); } } return ret; } /** **************************************************************************************************** * Lib::ComputeHtileAddrFromCoord * @@ -3645,21 +3655,21 @@ VOID Lib::OptimizeTileMode( { tileMode = ADDR_TM_LINEAR_ALIGNED; } else if (IsMacroTiled(tileMode) && (pInOut->flags.tcCompatible == FALSE)) { if (DegradeTo1D(width, height, macroWidthAlign, macroHeightAlign)) { tileMode = (thickness == 1) ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK; } - else if (thickness > 1) + else if ((thickness > 1) && (pInOut->flags.disallowLargeThickDegrade == 0)) { // As in the following HwlComputeSurfaceInfo, thick modes may be degraded to // thinner modes, we should re-evaluate whether the corresponding // thinner modes should be degraded. If so, we choose 1D thick mode instead. tileMode = DegradeLargeThickTile(pInOut->tileMode, pInOut->bpp); if (tileMode != pInOut->tileMode) { // Get thickness again after large thick degrade thickness = Thickness(tileMode); diff --git a/src/amd/addrlib/core/addrlib2.cpp b/src/amd/addrlib/core/addrlib2.cpp index ddaf597f9dd..fc9b71f3ee4 100644 --- a/src/amd/addrlib/core/addrlib2.cpp +++ b/src/amd/addrlib/core/addrlib2.cpp @@ -288,20 +288,22 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo( if (pOut->pStereoInfo != NULL) { ComputeQbStereoInfo(pOut); } } } } ADDR_ASSERT(pOut->surfSize != 0); + ValidBaseAlignments(pOut->baseAlign); + return returnCode; } /** ************************************************************************************************************************ * Lib::ComputeSurfaceInfo * * @brief * Interface function stub of AddrComputeSurfaceInfo. * @@ -440,20 +442,22 @@ ADDR_E_RETURNCODE Lib::ComputeHtileInfo( if ((GetFillSizeFieldsFlags() == TRUE) && ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT)) || (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT)))) { returnCode = ADDR_INVALIDPARAMS; } else { returnCode = HwlComputeHtileInfo(pIn, pOut); + + ValidMetaBaseAlignments(pOut->baseAlign); } return returnCode; } /** ************************************************************************************************************************ * Lib::ComputeHtileAddrFromCoord * * @brief @@ -538,20 +542,22 @@ ADDR_E_RETURNCODE Lib::ComputeCmaskInfo( { returnCode = ADDR_INVALIDPARAMS; } else if (pIn->cMaskFlags.linear) { returnCode = ADDR_INVALIDPARAMS; } else { returnCode = HwlComputeCmaskInfo(pIn, pOut); + + ValidMetaBaseAlignments(pOut->baseAlign); } return returnCode; } /** ************************************************************************************************************************ * Lib::ComputeCmaskAddrFromCoord * * @brief @@ -681,20 +687,22 @@ ADDR_E_RETURNCODE Lib::ComputeFmaskInfo( pOut->height = localOut.height; pOut->baseAlign = localOut.baseAlign; pOut->numSlices = localOut.numSlices; pOut->fmaskBytes = static_cast<UINT_32>(localOut.surfSize); pOut->sliceSize = static_cast<UINT_32>(localOut.sliceSize); pOut->bpp = localIn.bpp; pOut->numSamples = 1; } } + ValidBaseAlignments(pOut->baseAlign); + return returnCode; } /** ************************************************************************************************************************ * Lib::ComputeFmaskAddrFromCoord * * @brief * Interface function stub of ComputeFmaskAddrFromCoord. * @@ -757,20 +765,22 @@ ADDR_E_RETURNCODE Lib::ComputeDccInfo( if ((GetFillSizeFieldsFlags() == TRUE) && ((pIn->size != sizeof(ADDR2_COMPUTE_DCCINFO_INPUT)) || (pOut->size != sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT)))) { returnCode = ADDR_INVALIDPARAMS; } else { returnCode = HwlComputeDccInfo(pIn, pOut); + + ValidMetaBaseAlignments(pOut->dccRamBaseAlign); } return returnCode; } /** ************************************************************************************************************************ * Lib::ComputeDccAddrFromCoord * * @brief diff --git a/src/amd/addrlib/core/addrlib2.h b/src/amd/addrlib/core/addrlib2.h index bea2a485a61..d82e6c0984b 100644 --- a/src/amd/addrlib/core/addrlib2.h +++ b/src/amd/addrlib/core/addrlib2.h @@ -473,26 +473,20 @@ protected: return ADDR_INVALID_EQUATION_INDEX; } UINT_32 GetEquationIndex( const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const { return HwlGetEquationIndex(pIn, pOut); } - virtual UINT_32 HwlComputeSurfaceBaseAlign(AddrSwizzleMode swizzleMode) const - { - ADDR_NOT_IMPLEMENTED(); - return 0; - } - virtual ADDR_E_RETURNCODE HwlComputePipeBankXor( const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const { ADDR_NOT_IMPLEMENTED(); return ADDR_NOTSUPPORTED; } virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor( const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/gfx9/gfx9addrlib.cpp index e06f13c0afe..b88d3243228 100644 --- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp +++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp @@ -182,24 +182,24 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo( numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2); } else { numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10; } } numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2; - Dim3d metaBlkDim = {8, 8, 1}; + Dim3d metaBlkDim = {8, 8, 1}; UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2; - UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits); - UINT_32 heightAmp = totalAmpBits - widthAmp; + UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits); + UINT_32 heightAmp = totalAmpBits - widthAmp; metaBlkDim.w <<= widthAmp; metaBlkDim.h <<= heightAmp; #if DEBUG Dim3d metaBlkDimDbg = {8, 8, 1}; for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++) { if ((metaBlkDimDbg.h < metaBlkDimDbg.w) || ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w))) { @@ -214,53 +214,56 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo( #endif UINT_32 numMetaBlkX; UINT_32 numMetaBlkY; UINT_32 numMetaBlkZ; GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo, pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices, &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ); - UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; + const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2; + UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; - if (m_settings.htileAlignFix) + if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2)) { - sizeAlign <<= 1; + align *= (numPipeTotal >> 1); } - pOut->pitch = numMetaBlkX * metaBlkDim.w; - pOut->height = numMetaBlkY * metaBlkDim.h; - pOut->sliceSize = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4; - - pOut->metaBlkWidth = metaBlkDim.w; - pOut->metaBlkHeight = metaBlkDim.h; - pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; - - pOut->baseAlign = Max(numCompressBlkPerMetaBlk * 4, sizeAlign); + align = Max(align, metaBlkSize); if (m_settings.metaBaseAlignFix) { - pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode)); + align = Max(align, GetBlockSize(pIn->swizzleMode)); } - if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2)) + if (m_settings.htileAlignFix) { - UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2; + const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2; + const INT_32 htileCachelineSizeLog2 = 11; + const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal); - if (additionalAlign > sizeAlign) - { - sizeAlign = additionalAlign; - } + INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits)); + + align <<= rbMaskPadding; } - pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign); + pOut->pitch = numMetaBlkX * metaBlkDim.w; + pOut->height = numMetaBlkY * metaBlkDim.h; + pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize; + + pOut->metaBlkWidth = metaBlkDim.w; + pOut->metaBlkHeight = metaBlkDim.h; + pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; + + pOut->baseAlign = align; + pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align); return ADDR_OK; } /** ************************************************************************************************************************ * Gfx9Lib::HwlComputeCmaskInfo * * @brief * Interface function stub of AddrComputeCmaskInfo @@ -326,31 +329,31 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo( } ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h)); #endif UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w; UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h; UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u); UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; + if (m_settings.metaBaseAlignFix) + { + sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode)); + } + pOut->pitch = numMetaBlkX * metaBlkDim.w; pOut->height = numMetaBlkY * metaBlkDim.h; pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1; pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign); pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign); - if (m_settings.metaBaseAlignFix) - { - pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode)); - } - pOut->metaBlkWidth = metaBlkDim.w; pOut->metaBlkHeight = metaBlkDim.h; pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; return ADDR_OK; } /** ************************************************************************************************************************ @@ -631,30 +634,30 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo( pIn->unalignedWidth, pIn->unalignedHeight, numSlices, &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ); UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; if (numFrags > m_maxCompFrag) { sizeAlign *= (numFrags / m_maxCompFrag); } + if (m_settings.metaBaseAlignFix) + { + sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode)); + } + pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ * numCompressBlkPerMetaBlk * numFrags; pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign); pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign); - if (m_settings.metaBaseAlignFix) - { - pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, GetBlockSize(pIn->swizzleMode)); - } - pOut->pitch = numMetaBlkX * metaBlkDim.w; pOut->height = numMetaBlkY * metaBlkDim.h; pOut->depth = numMetaBlkZ * metaBlkDim.d; pOut->compressBlkWidth = compressBlkDim.w; pOut->compressBlkHeight = compressBlkDim.h; pOut->compressBlkDepth = compressBlkDim.d; pOut->metaBlkWidth = metaBlkDim.w; pOut->metaBlkHeight = metaBlkDim.h; @@ -663,35 +666,92 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo( pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; pOut->fastClearSizePerSlice = pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag); } return ADDR_OK; } /** ************************************************************************************************************************ -* Gfx9Lib::HwlGetMaxAlignments +* Gfx9Lib::HwlComputeMaxBaseAlignments * * @brief * Gets maximum alignments * @return -* ADDR_E_RETURNCODE +* maximum alignments ************************************************************************************************************************ */ -ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments( - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure - ) const +UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const { - pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB); + return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB); +} - return ADDR_OK; +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeMaxMetaBaseAlignments +* +* @brief +* Gets maximum alignments for metadata +* @return +* maximum alignments for metadata +************************************************************************************************************************ +*/ +UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const +{ + // Max base alignment for Htile + const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z); + const UINT_32 maxNumRbTotal = m_se * m_rbPerSe; + + // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2), + // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic. + ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u)); + const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u); + + UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes; + + if (maxNumPipeTotal > 2) + { + maxBaseAlignHtile *= (maxNumPipeTotal >> 1); + } + + maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile); + + if (m_settings.metaBaseAlignFix) + { + maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB)); + } + + if (m_settings.htileAlignFix) + { + maxBaseAlignHtile *= maxNumPipeTotal; + } + + // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate + + // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate + UINT_32 maxBaseAlignDcc3D = 65536; + + if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1)) + { + maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u); + } + + // Max base alignment for Msaa Dcc + UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag); + + if (m_settings.metaBaseAlignFix) + { + maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB)); + } + + return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D)); } /** ************************************************************************************************************************ * Gfx9Lib::HwlComputeCmaskAddrFromCoord * * @brief * Interface function stub of AddrComputeCmaskAddrFromCoord * * @return @@ -717,23 +777,25 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord( ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output); if (returnCode == ADDR_OK) { UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags); UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3); UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); - const CoordEq* pMetaEq = GetMetaEquation({0, fmaskElementBytesLog2, 0, pIn->cMaskFlags, - Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType, - metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}); + MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags, + Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType, + metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); UINT_32 xb = pIn->x / output.metaBlkWidth; UINT_32 yb = pIn->y / output.metaBlkHeight; UINT_32 zb = pIn->slice; UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex); @@ -791,23 +853,25 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord( returnCode = ComputeHtileInfo(&input, &output); if (returnCode == ADDR_OK) { UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); UINT_32 numSamplesLog2 = Log2(pIn->numSamples); - const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, - Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, - metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}); + MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, + Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, + metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); UINT_32 xb = pIn->x / output.metaBlkWidth; UINT_32 yb = pIn->y / output.metaBlkHeight; UINT_32 zb = pIn->slice; UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex); @@ -863,23 +927,25 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr( returnCode = ComputeHtileInfo(&input, &output); if (returnCode == ADDR_OK) { UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); UINT_32 numSamplesLog2 = Log2(pIn->numSamples); - const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, - Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, - metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}); + MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, + Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, + metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned, pIn->swizzleMode); UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1)); UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1; UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; @@ -941,24 +1007,26 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord( { UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); UINT_32 numSamplesLog2 = Log2(pIn->numFrags); UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth); UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth); UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight); UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth); - const CoordEq* pMetaEq = GetMetaEquation({pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags, - Gfx9DataColor, pIn->swizzleMode, pIn->resourceType, - metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2, - compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2}); + MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags, + Gfx9DataColor, pIn->swizzleMode, pIn->resourceType, + metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2, + compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); UINT_32 xb = pIn->x / output.metaBlkWidth; UINT_32 yb = pIn->y / output.metaBlkHeight; UINT_32 zb = pIn->slice / output.metaBlkDepth; UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex); @@ -1048,20 +1116,24 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams( break; case ADDR_CONFIG_PIPE_INTERLEAVE_2KB: m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB; m_pipeInterleaveLog2 = 11; break; default: ADDR_ASSERT_ALWAYS(); break; } + // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, + // and any larger value requires a post-process (left shift) on the output pipeBankXor bits. + ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B); + switch (gbAddrConfig.bits.NUM_BANKS) { case ADDR_CONFIG_1_BANK: m_banks = 1; m_banksLog2 = 0; break; case ADDR_CONFIG_2_BANK: m_banks = 2; m_banksLog2 = 1; break; @@ -1144,20 +1216,33 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams( break; default: ADDR_ASSERT_ALWAYS(); break; } m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2; ADDR_ASSERT((m_blockVarSizeLog2 == 0) || ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u))); m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u); + + if ((m_rbPerSeLog2 == 1) && + (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) || + ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2))))) + { + ADDR_ASSERT(m_settings.isVega10 == FALSE); + ADDR_ASSERT(m_settings.isRaven == FALSE); + + if (m_settings.isVega12) + { + m_settings.htileCacheRbConflict = 1; + } + } } else { valid = FALSE; ADDR_NOT_IMPLEMENTED(); } if (valid) { InitEquationTable(); @@ -1180,20 +1265,21 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily( UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h { ChipFamily family = ADDR_CHIP_FAMILY_AI; switch (uChipFamily) { case FAMILY_AI: m_settings.isArcticIsland = 1; m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision); + m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision); m_settings.isDce12 = 1; if (m_settings.isVega10 == 0) { m_settings.htileAlignFix = 1; m_settings.applyAliasFix = 1; } m_settings.metaBaseAlignFix = 1; @@ -3272,31 +3358,42 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( pOut->canXor = (pIn->flags.prt == FALSE) && (pIn->noXor == FALSE); // Filter out improper swType and blockSet by HW restriction if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil) { ADDR_ASSERT(IsTex2d(pOut->resourceType)); blockSet.value = AddrBlockSetMacro; addrPreferredSwSet.value = AddrSwSetZ; addrValidSwSet.value = AddrSwSetZ; - if (pIn->flags.depth && pIn->flags.texture) + if (pIn->flags.noMetadata == FALSE) { - if (((bpp == 16) && (numFrags >= 4)) || - ((bpp == 32) && (numFrags >= 2))) + if (pIn->flags.depth && + pIn->flags.texture && + (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2)))) { // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane // equation from wrong address within memory range a tile covered and use the // garbage data for compressed Z reading which finally leads to corruption. pOut->canXor = FALSE; prtXor = FALSE; } + + if (m_settings.htileCacheRbConflict && + (pIn->flags.depth || pIn->flags.stencil) && + (slice > 1) && + (pIn->flags.metaRbUnaligned == FALSE) && + (pIn->flags.metaPipeUnaligned == FALSE)) + { + // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency + pOut->canXor = FALSE; + } } } else if (ElemLib::IsBlockCompressed(pIn->format)) { // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes. // Not sure under what circumstances "_D" would be appropriate as these formats // are not displayable. blockSet.value = AddrBlockSetMacro; // This isn't to be used as texture and caller doesn't allow macro tiled. @@ -3395,26 +3492,26 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( prtXor = FALSE; addrValidSwSet.value = AddrSwSetD | AddrSwSetR; } else if (m_settings.isDcn1) { // _R is not supported by Dcn1 if (pIn->bpp == 64) { addrPreferredSwSet.value = AddrSwSetD; - addrValidSwSet.value = AddrSwSetD; + addrValidSwSet.value = AddrSwSetS | AddrSwSetD; } else { addrPreferredSwSet.value = AddrSwSetS; - addrValidSwSet.value = AddrSwSetS | AddrSwSetD; + addrValidSwSet.value = AddrSwSetS; } blockSet.micro = FALSE; } else { ADDR_NOT_IMPLEMENTED(); returnCode = ADDR_NOTSUPPORTED; } } @@ -4030,21 +4127,21 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled( { pOut->pMipInfo[0].pitch = pOut->pitch; pOut->pMipInfo[0].height = pOut->height; pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1; pOut->pMipInfo[0].offset = 0; } pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight * (pIn->bpp >> 3) * pIn->numFrags; pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice; - pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode); + pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode); if (pIn->flags.prt) { pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment); } } } return returnCode; } @@ -4755,29 +4852,26 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled( } returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor, bankBits, pipeBits, &blockOffset); blockOffset %= (1 << log2blkSize); UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth; UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight; UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock; - UINT_32 macroBlockIndex = + UINT_64 macroBlockIndex = (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock + ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock + ((pIn->x / localOut.blockWidth) + mipStartPos.w); - UINT_64 macroBlockOffset = (static_cast<UINT_64>(macroBlockIndex) << - GetBlockSizeLog2(pIn->swizzleMode)); - - pOut->addr = blockOffset | macroBlockOffset; + pOut->addr = blockOffset | (macroBlockIndex << log2blkSize); } else { UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode); Dim3d microBlockDim = Block1K_3d[log2ElementBytes]; UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), (pIn->slice / microBlockDim.d), @@ -4828,21 +4922,21 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled( blockOffset %= (1 << log2blkSize); UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w; UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h; UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d; UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth; UINT_32 sliceSizeInBlock = (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock; - UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; + UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; pOut->addr = blockOffset | (blockIndex << log2blkSize); } } else { returnCode = ADDR_INVALIDPARAMS; } return returnCode; diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.h b/src/amd/addrlib/gfx9/gfx9addrlib.h index 1f233a4ff91..7c61a40880e 100644 --- a/src/amd/addrlib/gfx9/gfx9addrlib.h +++ b/src/amd/addrlib/gfx9/gfx9addrlib.h @@ -48,33 +48,33 @@ namespace V2 ************************************************************************************************************************ */ struct Gfx9ChipSettings { struct { // Asic/Generation name UINT_32 isArcticIsland : 1; UINT_32 isVega10 : 1; UINT_32 isRaven : 1; - UINT_32 reserved0 : 29; + UINT_32 isVega12 : 1; // Display engine IP version name UINT_32 isDce12 : 1; UINT_32 isDcn1 : 1; - UINT_32 reserved1 : 29; // Misc configuration bits UINT_32 metaBaseAlignFix : 1; UINT_32 depthPipeXorDisable : 1; UINT_32 htileAlignFix : 1; UINT_32 applyAliasFix : 1; - UINT_32 reserved2 : 28; + UINT_32 htileCacheRbConflict: 1; + UINT_32 reserved2 : 27; }; }; /** ************************************************************************************************************************ * @brief GFX9 data surface type. ************************************************************************************************************************ */ enum Gfx9DataType { @@ -114,23 +114,20 @@ struct MetaEqParams class Gfx9Lib : public Lib { public: /// Creates Gfx9Lib object static Addr::Lib* CreateObj(const Client* pClient) { VOID* pMem = Object::ClientAlloc(sizeof(Gfx9Lib), pClient); return (pMem != NULL) ? new (pMem) Gfx9Lib(pClient) : NULL; } - virtual BOOL_32 IsValidDisplaySwizzleMode( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; - protected: Gfx9Lib(const Client* pClient); virtual ~Gfx9Lib(); virtual BOOL_32 HwlIsStandardSwizzle( AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const { return m_swizzleModeTable[swizzleMode].isStd || (IsTex3d(resourceType) && m_swizzleModeTable[swizzleMode].isDisp); @@ -217,21 +214,21 @@ protected: *ppEquationTable = m_equationTable; return m_numEquations; } virtual BOOL_32 IsEquationSupported( AddrResourceType rsrcType, AddrSwizzleMode swMode, UINT_32 elementBytesLog2) const; - virtual UINT_32 HwlComputeSurfaceBaseAlign(AddrSwizzleMode swizzleMode) const + UINT_32 ComputeSurfaceBaseAlignTiled(AddrSwizzleMode swizzleMode) const { UINT_32 baseAlign; if (IsXor(swizzleMode)) { baseAlign = GetBlockSize(swizzleMode); } else { baseAlign = 256; @@ -393,25 +390,25 @@ protected: ADDR_EQUATION m_equationTable[EquationTableSize]; // Number of equation entries in the table UINT_32 m_numEquations; // Equation lookup table according to bpp and tile index UINT_32 m_equationLookupTable[MaxRsrcType][MaxSwMode][MaxElementBytesLog2]; static const UINT_32 MaxCachedMetaEq = 2; private: - virtual ADDR_E_RETURNCODE HwlGetMaxAlignments( - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const; + virtual UINT_32 HwlComputeMaxBaseAlignments() const; - virtual BOOL_32 HwlInitGlobalParams( - const ADDR_CREATE_INPUT* pCreateIn); + virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const; + + virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn); VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const; VOID GetDataEquation(CoordEq* pDataEq, Gfx9DataType dataSurfaceType, AddrSwizzleMode swizzleMode, AddrResourceType resourceType, UINT_32 elementBytesLog2, UINT_32 numSamplesLog2) const; VOID GetPipeEquation(CoordEq* pPipeEq, CoordEq* pDataEq, UINT_32 pipeInterleaveLog2, UINT_32 numPipesLog2, UINT_32 numSamplesLog2, Gfx9DataType dataSurfaceType, @@ -427,20 +424,22 @@ private: const CoordEq* GetMetaEquation(const MetaEqParams& metaEqParams); virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision); VOID GetMetaMipInfo(UINT_32 numMipLevels, Dim3d* pMetaBlkDim, BOOL_32 dataThick, ADDR2_META_MIP_INFO* pInfo, UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth, UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const; + BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; + ADDR_E_RETURNCODE ComputeSurfaceLinearPadding( const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32* pMipmap0PaddedWidth, UINT_32* pSlice0PaddedHeight, ADDR2_MIP_INFO* pMipInfo = NULL) const; Gfx9ChipSettings m_settings; CoordEq m_cachedMetaEq[MaxCachedMetaEq]; MetaEqParams m_cachedMetaEqKey[MaxCachedMetaEq]; diff --git a/src/amd/addrlib/r800/ciaddrlib.cpp b/src/amd/addrlib/r800/ciaddrlib.cpp index 322dcf64ffd..1b982c5c08b 100644 --- a/src/amd/addrlib/r800/ciaddrlib.cpp +++ b/src/amd/addrlib/r800/ciaddrlib.cpp @@ -729,21 +729,21 @@ ADDR_E_RETURNCODE CiLib::HwlComputeSurfaceInfo( { pOut->macroModeIndex = TileIndexInvalid; ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn; localIn.tileIndex = TileIndexInvalid; localIn.pTileInfo = NULL; localIn.flags.tcCompatible = FALSE; SiLib::HwlComputeSurfaceInfo(&localIn, pOut); - ADDR_ASSERT(((MinDepth2DThinIndex <= pOut->tileIndex) && (MaxDepth2DThinIndex >= pOut->tileIndex)) || pOut->tileIndex == Depth1DThinIndex); + ADDR_ASSERT((MinDepth2DThinIndex <= pOut->tileIndex) && (MaxDepth2DThinIndex >= pOut->tileIndex)); depthStencil2DTileConfigMatch = DepthStencilTileCfgMatch(pIn, pOut); } if ((depthStencil2DTileConfigMatch == FALSE) && (pIn->numSamples <= 1)) { pOut->macroModeIndex = TileIndexInvalid; ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn; @@ -2150,57 +2150,75 @@ VOID CiLib::HwlPadDimensions( *pPitchAlign = dccFastClearPitchAlignInPixels; } } } } } /** **************************************************************************************************** -* CiLib::HwlGetMaxAlignments +* CiLib::HwlComputeMaxBaseAlignments * * @brief * Gets maximum alignments * @return -* ADDR_E_RETURNCODE +* maximum alignments **************************************************************************************************** */ -ADDR_E_RETURNCODE CiLib::HwlGetMaxAlignments( - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure - ) const +UINT_32 CiLib::HwlComputeMaxBaseAlignments() const { const UINT_32 pipes = HwlGetPipes(&m_tileTable[0].info); // Initial size is 64 KiB for PRT. - UINT_64 maxBaseAlign = 64 * 1024; + UINT_32 maxBaseAlign = 64 * 1024; for (UINT_32 i = 0; i < m_noOfMacroEntries; i++) { // The maximum tile size is 16 byte-per-pixel and either 8-sample or 8-slice. UINT_32 tileSize = m_macroTileTable[i].tileSplitBytes; - UINT_64 baseAlign = tileSize * pipes * m_macroTileTable[i].banks * + UINT_32 baseAlign = tileSize * pipes * m_macroTileTable[i].banks * m_macroTileTable[i].bankWidth * m_macroTileTable[i].bankHeight; if (baseAlign > maxBaseAlign) { maxBaseAlign = baseAlign; } } - if (pOut != NULL) + return maxBaseAlign; +} + +/** +**************************************************************************************************** +* CiLib::HwlComputeMaxMetaBaseAlignments +* +* @brief +* Gets maximum alignments for metadata +* @return +* maximum alignments for metadata +**************************************************************************************************** +*/ +UINT_32 CiLib::HwlComputeMaxMetaBaseAlignments() const +{ + UINT_32 maxBank = 1; + + for (UINT_32 i = 0; i < m_noOfMacroEntries; i++) { - pOut->baseAlign = maxBaseAlign; + if ((m_settings.isVolcanicIslands) && IsMacroTiled(m_tileTable[i].mode)) + { + maxBank = Max(maxBank, m_macroTileTable[i].banks); + } } - return ADDR_OK; + return SiLib::HwlComputeMaxMetaBaseAlignments() * maxBank; } /** **************************************************************************************************** * CiLib::DepthStencilTileCfgMatch * * @brief * Try to find a tile index for stencil which makes its tile config parameters matches to depth * @return * TRUE if such tile index for stencil can be found diff --git a/src/amd/addrlib/r800/ciaddrlib.h b/src/amd/addrlib/r800/ciaddrlib.h index c11b678574f..28c19f06031 100644 --- a/src/amd/addrlib/r800/ciaddrlib.h +++ b/src/amd/addrlib/r800/ciaddrlib.h @@ -130,21 +130,23 @@ protected: ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const; virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord( const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const; virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord( const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const; - virtual ADDR_E_RETURNCODE HwlGetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const; + virtual UINT_32 HwlComputeMaxBaseAlignments() const; + + virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const; virtual VOID HwlPadDimensions( AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 mipLevel, UINT_32* pPitch, UINT_32 *PitchAlign, UINT_32 height, UINT_32 heightAlign) const; virtual VOID HwlComputeSurfaceAlignmentsMacroTiled( AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; diff --git a/src/amd/addrlib/r800/egbaddrlib.cpp b/src/amd/addrlib/r800/egbaddrlib.cpp index 99aa6cf4cdb..3947cfda2fd 100644 --- a/src/amd/addrlib/r800/egbaddrlib.cpp +++ b/src/amd/addrlib/r800/egbaddrlib.cpp @@ -93,25 +93,27 @@ BOOL_32 EgBasedLib::DispatchComputeSurfaceInfo( UINT_32 numSamples = pIn->numSamples; UINT_32 numFrags = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags); UINT_32 pitch = pIn->width; UINT_32 height = pIn->height; UINT_32 numSlices = pIn->numSlices; UINT_32 mipLevel = pIn->mipLevel; ADDR_SURFACE_FLAGS flags = pIn->flags; ADDR_TILEINFO tileInfoDef = {0}; ADDR_TILEINFO* pTileInfo = &tileInfoDef; - - UINT_32 padDims = 0; + UINT_32 padDims = 0; BOOL_32 valid; - tileMode = DegradeLargeThickTile(tileMode, bpp); + if (pIn->flags.disallowLargeThickDegrade == 0) + { + tileMode = DegradeLargeThickTile(tileMode, bpp); + } // Only override numSamples for NI above if (m_chipFamily >= ADDR_CHIP_FAMILY_NI) { if (numFrags != numSamples) // This means EQAA { // The real surface size needed is determined by number of fragments numSamples = numFrags; } diff --git a/src/amd/addrlib/r800/siaddrlib.cpp b/src/amd/addrlib/r800/siaddrlib.cpp index 0fb5c2befdc..3c17a7aa8d7 100644 --- a/src/amd/addrlib/r800/siaddrlib.cpp +++ b/src/amd/addrlib/r800/siaddrlib.cpp @@ -3461,62 +3461,77 @@ VOID SiLib::HwlSelectTileMode( pInOut->flags.opt4Space = TRUE; // Optimize tile mode if possible OptimizeTileMode(pInOut); HwlOverrideTileMode(pInOut); } /** **************************************************************************************************** -* SiLib::HwlGetMaxAlignments +* SiLib::HwlComputeMaxBaseAlignments * * @brief * Gets maximum alignments * @return -* ADDR_E_RETURNCODE +* maximum alignments **************************************************************************************************** */ -ADDR_E_RETURNCODE SiLib::HwlGetMaxAlignments( - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure - ) const +UINT_32 SiLib::HwlComputeMaxBaseAlignments() const { const UINT_32 pipes = HwlGetPipes(&m_tileTable[0].info); // Initial size is 64 KiB for PRT. - UINT_64 maxBaseAlign = 64 * 1024; + UINT_32 maxBaseAlign = 64 * 1024; for (UINT_32 i = 0; i < m_noOfEntries; i++) { if ((IsMacroTiled(m_tileTable[i].mode) == TRUE) && (IsPrtTileMode(m_tileTable[i].mode) == FALSE)) { // The maximum tile size is 16 byte-per-pixel and either 8-sample or 8-slice. UINT_32 tileSize = Min(m_tileTable[i].info.tileSplitBytes, MicroTilePixels * 8 * 16); - UINT_64 baseAlign = tileSize * pipes * m_tileTable[i].info.banks * + UINT_32 baseAlign = tileSize * pipes * m_tileTable[i].info.banks * m_tileTable[i].info.bankWidth * m_tileTable[i].info.bankHeight; if (baseAlign > maxBaseAlign) { maxBaseAlign = baseAlign; } } } - if (pOut != NULL) + return maxBaseAlign; +} + +/** +**************************************************************************************************** +* SiLib::HwlComputeMaxMetaBaseAlignments +* +* @brief +* Gets maximum alignments for metadata +* @return +* maximum alignments for metadata +**************************************************************************************************** +*/ +UINT_32 SiLib::HwlComputeMaxMetaBaseAlignments() const +{ + UINT_32 maxPipe = 1; + + for (UINT_32 i = 0; i < m_noOfEntries; i++) { - pOut->baseAlign = maxBaseAlign; + maxPipe = Max(maxPipe, HwlGetPipes(&m_tileTable[i].info)); } - return ADDR_OK; + return m_pipeInterleaveBytes * maxPipe; } /** **************************************************************************************************** * SiLib::HwlComputeSurfaceAlignmentsMacroTiled * * @brief * Hardware layer function to compute alignment request for macro tile mode * * @return diff --git a/src/amd/addrlib/r800/siaddrlib.h b/src/amd/addrlib/r800/siaddrlib.h index f07fc31a57d..9c879fe6c36 100644 --- a/src/amd/addrlib/r800/siaddrlib.h +++ b/src/amd/addrlib/r800/siaddrlib.h @@ -256,21 +256,23 @@ protected: UINT_32* pNumSamples) const; virtual BOOL_32 HwlReduceBankWidthHeight( UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples, UINT_32 bankHeightAlign, UINT_32 pipes, ADDR_TILEINFO* pTileInfo) const { return TRUE; } - virtual ADDR_E_RETURNCODE HwlGetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const; + virtual UINT_32 HwlComputeMaxBaseAlignments() const; + + virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const; virtual VOID HwlComputeSurfaceAlignmentsMacroTiled( AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; // Get equation table pointer and number of equations virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const { *ppEquationTable = m_equationTable; diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index 92bdf1dedec..603b7058bdc 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -156,21 +156,21 @@ static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT * pInpu } ADDR_HANDLE amdgpu_addr_create(const struct radeon_info *info, const struct amdgpu_gpu_info *amdinfo, uint64_t *max_alignment) { ADDR_CREATE_INPUT addrCreateInput = {0}; ADDR_CREATE_OUTPUT addrCreateOutput = {0}; ADDR_REGISTER_VALUE regValue = {0}; ADDR_CREATE_FLAGS createFlags = {{0}}; - ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0}; + ADDR_GET_MAX_ALINGMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0}; ADDR_E_RETURNCODE addrRet; addrCreateInput.size = sizeof(ADDR_CREATE_INPUT); addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT); regValue.gbAddrConfig = amdinfo->gb_addr_cfg; createFlags.value = 0; addrlib_family_rev_id(info->family, &addrCreateInput.chipFamily, &addrCreateInput.chipRevision); if (addrCreateInput.chipFamily == FAMILY_UNKNOWN) -- 2.15.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev