On Wed, Nov 19, 2025 at 6:59 PM Tianci Yin <[email protected]> wrote:
>
> From: tiancyin <[email protected]>
>
> [Why]
> On some servers equipped with huge system memory at multi-terabyte scale,
> the PCI bus physical address alignment policy may assign GPUs very large
> bus addresses that exceed 44 bits. This causes DMA address overflow errors:
>
> [   83.216803] amdgpu 0000:43:00.0: DMA addr 0x0000210b39000000+8388608
> overflow (mask fffffffffff, bus limit 0).
>
> [How]
> Enlarge the DMA mask from 44-bit to 48-bit to accommodate larger physical
> addresses.

The GPU only has 44 bits of DMA addressing so you can't increase this.
You'll need to use the IOMMU if you have more address space than the
GPU can access.

Alex

>
> Signed-off-by: tiancyin <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 24 +++++++++++++++++++-----
>  drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 26 +++++++++++++++++++++-----
>  2 files changed, 40 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> index a1f8141f28c9..60393e311537 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> @@ -21,6 +21,7 @@
>   *
>   */
>  #include <linux/firmware.h>
> +#include <linux/processor.h>
>  #include <linux/pci.h>
>
>  #include <drm/drm_cache.h>
> @@ -726,7 +727,7 @@ static int gmc_v11_0_gart_init(struct amdgpu_device *adev)
>
>  static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
>  {
> -       int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
> +       int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_mask;
>         struct amdgpu_device *adev = ip_block->adev;
>
>         adev->mmhub.funcs->init(adev);
> @@ -805,13 +806,26 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block 
> *ip_block)
>          */
>         adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
>
> -       r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
> +#if defined CONFIG_X86 && defined CONFIG_PHYS_ADDR_T_64BIT
> +       dma_mask = boot_cpu_data.x86_phys_bits >= 48 ? 48 : 44;
> +#else
> +       dma_mask = 44;
> +#endif
> +       r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_mask));
>         if (r) {
> -               dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
> -               return r;
> +               dev_notice(adev->dev,
> +                       "amdgpu: %d bit DMA is not available, fallback to 44 
> bit.\n",
> +                       dma_mask);
> +               dma_mask = 44;
> +               r = dma_set_mask_and_coherent(adev->dev, 
> DMA_BIT_MASK(dma_mask));
> +               if (r) {
> +                       dev_warn(adev->dev,
> +                                "amdgpu: No suitable DMA available.\n");
> +                       return r;
> +               }
>         }
>
> -       adev->need_swiotlb = drm_need_swiotlb(44);
> +       adev->need_swiotlb = drm_need_swiotlb(dma_mask);
>
>         r = gmc_v11_0_mc_init(adev);
>         if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> index f4a19357ccbc..5ca3d1141cb3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> @@ -21,6 +21,7 @@
>   *
>   */
>  #include <linux/firmware.h>
> +#include <linux/processor.h>
>  #include <linux/pci.h>
>
>  #include <drm/drm_cache.h>
> @@ -742,7 +743,7 @@ static int gmc_v12_0_gart_init(struct amdgpu_device *adev)
>
>  static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
>  {
> -       int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
> +       int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_mask;
>         struct amdgpu_device *adev = ip_block->adev;
>
>         adev->mmhub.funcs->init(adev);
> @@ -802,13 +803,28 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block 
> *ip_block)
>          */
>         adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
>
> -       r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
> +#if defined CONFIG_X86 && defined CONFIG_PHYS_ADDR_T_64BIT
> +       dma_mask = boot_cpu_data.x86_phys_bits >= 48 ? 48 : 44;
> +#else
> +       dma_mask = 44;
> +#endif
> +       r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_mask));
>         if (r) {
> -               printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
> -               return r;
> +               printk(KERN_NOTICE
> +                       "amdgpu: %d bit DMA is not available, fallback to 44 
> bit.\n",
> +                       dma_mask);
> +               dma_mask = 44;
> +               r = dma_set_mask_and_coherent(adev->dev,
> +                                             DMA_BIT_MASK(dma_mask));
> +               if (r) {
> +                       printk(KERN_WARNING
> +                              "amdgpu: No suitable DMA available.\n");
> +                       return r;
> +               }
> +
>         }
>
> -       adev->need_swiotlb = drm_need_swiotlb(44);
> +       adev->need_swiotlb = drm_need_swiotlb(dma_mask);
>
>         r = gmc_v12_0_mc_init(adev);
>         if (r)
> --
> 2.34.1
>

Reply via email to