On Thu, Jun 06, 2019 at 11:41:07PM +0200, Mark Kettenis wrote:
> As a result of a recent discussion with jsg@, I realized that the
> graphics drivers are (mostly) allocating memory from the dma region.
> Since the the graphics stack can potentially gobble up large amounts
> of memory, this means we can run out of dma memory which makes other
> parts of our kernel quite unhappy.  Most of the supported hardware
> actually supports 64-bit DMA just fine, and the drivers already have
> code to handle the exceptions.  The diff below makes use of this
> knowledge to (hopefully) safely allocate from "high" memory when
> possible.  One big change is that this makes bus_dma(9) 64-bit DMA
> aware in the sense that if the BUS_DMA_64BIT flag is used, we skip the
> "not dma-reachable" panic.
> 
> It seems to work fine on my Intel Broadwell laptop.  I haven't tested
> this on radeon(4) yet.  So further testing, especially on systems with
> 4GB of memory or more is necessary.
> 
> Please test.

One of the ways dma32 is set in radeon/amdgpu is along the lines of

dma_bits = rdev->need_dma32 ? 32 : 40;
r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(dma_bits));
if (r) {
        rdev->need_dma32 = true;
        dma_bits = 32;

which we don't handle.  That is for the case where the card supports
64 bit dma but the system doesn't?  Not something we should be
concerned about?

Having __GFP_DMA32 set the sign bit on the flags seems like it is asking
for trouble.  How about 0x40000000 / bit 30 instead of
0x80000000 / bit 31.

Otherwise looks good.

> 
> 
> Index: arch/amd64/amd64/bus_dma.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/bus_dma.c,v
> retrieving revision 1.50
> diff -u -p -r1.50 bus_dma.c
> --- arch/amd64/amd64/bus_dma.c        14 Oct 2017 04:44:43 -0000      1.50
> +++ arch/amd64/amd64/bus_dma.c        6 Jun 2019 21:19:21 -0000
> @@ -319,7 +319,8 @@ _bus_dmamap_load_raw(bus_dma_tag_t t, bu
>                       if (plen < sgsize)
>                               sgsize = plen;
>  
> -                     if (paddr > dma_constraint.ucr_high)
> +                     if (paddr > dma_constraint.ucr_high &&
> +                         (map->_dm_flags & BUS_DMA_64BIT) == 0)
>                               panic("Non dma-reachable buffer at paddr 
> %#lx(raw)",
>                                   paddr);
>  
> @@ -583,7 +584,8 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t,
>                */
>               pmap_extract(pmap, vaddr, (paddr_t *)&curaddr);
>  
> -             if (curaddr > dma_constraint.ucr_high)
> +             if (curaddr > dma_constraint.ucr_high &&
> +                 (map->_dm_flags & BUS_DMA_64BIT) == 0)
>                       panic("Non dma-reachable buffer at curaddr %#lx(raw)",
>                           curaddr);
>  
> Index: dev/pci/drm/drm_linux.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> retrieving revision 1.37
> diff -u -p -r1.37 drm_linux.c
> --- dev/pci/drm/drm_linux.c   4 Jun 2019 12:08:22 -0000       1.37
> +++ dev/pci/drm/drm_linux.c   6 Jun 2019 21:19:21 -0000
> @@ -293,16 +293,19 @@ struct vm_page *
>  alloc_pages(unsigned int gfp_mask, unsigned int order)
>  {
>       int flags = (gfp_mask & M_NOWAIT) ? UVM_PLA_NOWAIT : UVM_PLA_WAITOK;
> +     struct uvm_constraint_range *constraint = &no_constraint;
>       struct pglist mlist;
>  
>       if (gfp_mask & M_CANFAIL)
>               flags |= UVM_PLA_FAILOK;
>       if (gfp_mask & M_ZERO)
>               flags |= UVM_PLA_ZERO;
> +     if (gfp_mask & __GFP_DMA32)
> +             constraint = &dma_constraint;
>  
>       TAILQ_INIT(&mlist);
> -     if (uvm_pglistalloc(PAGE_SIZE << order, dma_constraint.ucr_low,
> -         dma_constraint.ucr_high, PAGE_SIZE, 0, &mlist, 1, flags))
> +     if (uvm_pglistalloc(PAGE_SIZE << order, constraint->ucr_low,
> +         constraint->ucr_high, PAGE_SIZE, 0, &mlist, 1, flags))
>               return NULL;
>       return TAILQ_FIRST(&mlist);
>  }
> Index: dev/pci/drm/include/linux/gfp.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/include/linux/gfp.h,v
> retrieving revision 1.1
> diff -u -p -r1.1 gfp.h
> --- dev/pci/drm/include/linux/gfp.h   14 Apr 2019 10:14:53 -0000      1.1
> +++ dev/pci/drm/include/linux/gfp.h   6 Jun 2019 21:19:21 -0000
> @@ -7,24 +7,25 @@
>  #include <sys/malloc.h>
>  #include <uvm/uvm_extern.h>
>  
> -#define GFP_ATOMIC   M_NOWAIT
> -#define GFP_NOWAIT   M_NOWAIT
> -#define GFP_KERNEL   (M_WAITOK | M_CANFAIL)
> -#define GFP_USER     (M_WAITOK | M_CANFAIL)
> -#define GFP_TEMPORARY        (M_WAITOK | M_CANFAIL)
> -#define GFP_HIGHUSER 0
> -#define GFP_DMA32    0
> -#define __GFP_NOWARN 0
> -#define __GFP_NORETRY        0
> -#define __GFP_ZERO   M_ZERO
> +#define __GFP_ZERO           M_ZERO
> +#define __GFP_DMA32          0x80000000
> +#define __GFP_NOWARN         0
> +#define __GFP_NORETRY                0
>  #define __GFP_RETRY_MAYFAIL  0
>  #define __GFP_MOVABLE                0
>  #define __GFP_COMP           0
> -#define GFP_TRANSHUGE_LIGHT  0
>  #define __GFP_KSWAPD_RECLAIM 0
>  #define __GFP_HIGHMEM                0
>  #define __GFP_RECLAIMABLE    0
> -#define __GFP_DMA32          0
> +
> +#define GFP_ATOMIC           M_NOWAIT
> +#define GFP_NOWAIT           M_NOWAIT
> +#define GFP_KERNEL           (M_WAITOK | M_CANFAIL)
> +#define GFP_USER             (M_WAITOK | M_CANFAIL)
> +#define GFP_TEMPORARY                (M_WAITOK | M_CANFAIL)
> +#define GFP_HIGHUSER         0
> +#define GFP_DMA32            __GFP_DMA32
> +#define GFP_TRANSHUGE_LIGHT  0
>  
>  static inline bool
>  gfpflags_allow_blocking(const unsigned int flags)
> Index: dev/pci/drm/ttm/ttm_tt.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/ttm/ttm_tt.c,v
> retrieving revision 1.8
> diff -u -p -r1.8 ttm_tt.c
> --- dev/pci/drm/ttm/ttm_tt.c  14 Apr 2019 10:14:54 -0000      1.8
> +++ dev/pci/drm/ttm/ttm_tt.c  6 Jun 2019 21:19:22 -0000
> @@ -261,6 +261,7 @@ int ttm_dma_tt_init(struct ttm_dma_tt *t
>                   uint32_t page_flags)
>  {
>       struct ttm_tt *ttm = &ttm_dma->ttm;
> +     int flags = BUS_DMA_WAITOK;
>  
>       ttm_tt_init_fields(ttm, bo, page_flags);
>  
> @@ -276,8 +277,10 @@ int ttm_dma_tt_init(struct ttm_dma_tt *t
>  
>       ttm_dma->dmat = bo->bdev->dmat;
>  
> +     if ((page_flags & TTM_PAGE_FLAG_DMA32) == 0)
> +             flags |= BUS_DMA_64BIT;
>       if (bus_dmamap_create(ttm_dma->dmat, ttm->num_pages << PAGE_SHIFT,
> -         ttm->num_pages, ttm->num_pages << PAGE_SHIFT, 0, BUS_DMA_WAITOK,
> +         ttm->num_pages, ttm->num_pages << PAGE_SHIFT, 0, flags,
>           &ttm_dma->map)) {
>               free(ttm_dma->segs, M_DRM, 0);
>               ttm_tt_destroy(ttm);
> @@ -293,6 +296,7 @@ int ttm_sg_tt_init(struct ttm_dma_tt *tt
>                  uint32_t page_flags)
>  {
>       struct ttm_tt *ttm = &ttm_dma->ttm;
> +     int flags = BUS_DMA_WAITOK;
>       int ret;
>  
>       ttm_tt_init_fields(ttm, bo, page_flags);
> @@ -313,8 +317,10 @@ int ttm_sg_tt_init(struct ttm_dma_tt *tt
>  
>       ttm_dma->dmat = bo->bdev->dmat;
>  
> +     if ((page_flags & TTM_PAGE_FLAG_DMA32) == 0)
> +             flags |= BUS_DMA_64BIT;
>       if (bus_dmamap_create(ttm_dma->dmat, ttm->num_pages << PAGE_SHIFT,
> -         ttm->num_pages, ttm->num_pages << PAGE_SHIFT, 0, BUS_DMA_WAITOK,
> +         ttm->num_pages, ttm->num_pages << PAGE_SHIFT, 0, flags,
>           &ttm_dma->map)) {
>               free(ttm_dma->segs, M_DRM, 0);
>               ttm_tt_destroy(ttm);
> 

Reply via email to