On Thu, Jun 06, 2019 at 11:41:07PM +0200, Mark Kettenis wrote: > As a result of a recent discussion with jsg@, I realized that the > graphics drivers are (mostly) allocating memory from the dma region. > Since the the graphics stack can potentially gobble up large amounts > of memory, this means we can run out of dma memory which makes other > parts of our kernel quite unhappy. Most of the supported hardware > actually supports 64-bit DMA just fine, and the drivers already have > code to handle the exceptions. The diff below makes use of this > knowledge to (hopefully) safely allocate from "high" memory when > possible. One big change is that this makes bus_dma(9) 64-bit DMA > aware in the sense that if the BUS_DMA_64BIT flag is used, we skip the > "not dma-reachable" panic. > > It seems to work fine on my Intel Broadwell laptop. I haven't tested > this on radeon(4) yet. So further testing, especially on systems with > 4GB of memory or more is necessary. > > Please test.
One of the ways dma32 is set in radeon/amdgpu is along the lines of dma_bits = rdev->need_dma32 ? 32 : 40; r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(dma_bits)); if (r) { rdev->need_dma32 = true; dma_bits = 32; which we don't handle. That is for the case where the card supports 64 bit dma but the system doesn't? Not something we should be concerned about? Having __GFP_DMA32 set the sign bit on the flags seems like it is asking for trouble. How about 0x40000000 / bit 30 instead of 0x80000000 / bit 31. Otherwise looks good. > > > Index: arch/amd64/amd64/bus_dma.c > =================================================================== > RCS file: /cvs/src/sys/arch/amd64/amd64/bus_dma.c,v > retrieving revision 1.50 > diff -u -p -r1.50 bus_dma.c > --- arch/amd64/amd64/bus_dma.c 14 Oct 2017 04:44:43 -0000 1.50 > +++ arch/amd64/amd64/bus_dma.c 6 Jun 2019 21:19:21 -0000 > @@ -319,7 +319,8 @@ _bus_dmamap_load_raw(bus_dma_tag_t t, bu > if (plen < sgsize) > sgsize = plen; > > - if (paddr > dma_constraint.ucr_high) > + if (paddr > dma_constraint.ucr_high && > + (map->_dm_flags & BUS_DMA_64BIT) == 0) > panic("Non dma-reachable buffer at paddr > %#lx(raw)", > paddr); > > @@ -583,7 +584,8 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, > */ > pmap_extract(pmap, vaddr, (paddr_t *)&curaddr); > > - if (curaddr > dma_constraint.ucr_high) > + if (curaddr > dma_constraint.ucr_high && > + (map->_dm_flags & BUS_DMA_64BIT) == 0) > panic("Non dma-reachable buffer at curaddr %#lx(raw)", > curaddr); > > Index: dev/pci/drm/drm_linux.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v > retrieving revision 1.37 > diff -u -p -r1.37 drm_linux.c > --- dev/pci/drm/drm_linux.c 4 Jun 2019 12:08:22 -0000 1.37 > +++ dev/pci/drm/drm_linux.c 6 Jun 2019 21:19:21 -0000 > @@ -293,16 +293,19 @@ struct vm_page * > alloc_pages(unsigned int gfp_mask, unsigned int order) > { > int flags = (gfp_mask & M_NOWAIT) ? UVM_PLA_NOWAIT : UVM_PLA_WAITOK; > + struct uvm_constraint_range *constraint = &no_constraint; > struct pglist mlist; > > if (gfp_mask & M_CANFAIL) > flags |= UVM_PLA_FAILOK; > if (gfp_mask & M_ZERO) > flags |= UVM_PLA_ZERO; > + if (gfp_mask & __GFP_DMA32) > + constraint = &dma_constraint; > > TAILQ_INIT(&mlist); > - if (uvm_pglistalloc(PAGE_SIZE << order, dma_constraint.ucr_low, > - dma_constraint.ucr_high, PAGE_SIZE, 0, &mlist, 1, flags)) > + if (uvm_pglistalloc(PAGE_SIZE << order, constraint->ucr_low, > + constraint->ucr_high, PAGE_SIZE, 0, &mlist, 1, flags)) > return NULL; > return TAILQ_FIRST(&mlist); > } > Index: dev/pci/drm/include/linux/gfp.h > =================================================================== > RCS file: /cvs/src/sys/dev/pci/drm/include/linux/gfp.h,v > retrieving revision 1.1 > diff -u -p -r1.1 gfp.h > --- dev/pci/drm/include/linux/gfp.h 14 Apr 2019 10:14:53 -0000 1.1 > +++ dev/pci/drm/include/linux/gfp.h 6 Jun 2019 21:19:21 -0000 > @@ -7,24 +7,25 @@ > #include <sys/malloc.h> > #include <uvm/uvm_extern.h> > > -#define GFP_ATOMIC M_NOWAIT > -#define GFP_NOWAIT M_NOWAIT > -#define GFP_KERNEL (M_WAITOK | M_CANFAIL) > -#define GFP_USER (M_WAITOK | M_CANFAIL) > -#define GFP_TEMPORARY (M_WAITOK | M_CANFAIL) > -#define GFP_HIGHUSER 0 > -#define GFP_DMA32 0 > -#define __GFP_NOWARN 0 > -#define __GFP_NORETRY 0 > -#define __GFP_ZERO M_ZERO > +#define __GFP_ZERO M_ZERO > +#define __GFP_DMA32 0x80000000 > +#define __GFP_NOWARN 0 > +#define __GFP_NORETRY 0 > #define __GFP_RETRY_MAYFAIL 0 > #define __GFP_MOVABLE 0 > #define __GFP_COMP 0 > -#define GFP_TRANSHUGE_LIGHT 0 > #define __GFP_KSWAPD_RECLAIM 0 > #define __GFP_HIGHMEM 0 > #define __GFP_RECLAIMABLE 0 > -#define __GFP_DMA32 0 > + > +#define GFP_ATOMIC M_NOWAIT > +#define GFP_NOWAIT M_NOWAIT > +#define GFP_KERNEL (M_WAITOK | M_CANFAIL) > +#define GFP_USER (M_WAITOK | M_CANFAIL) > +#define GFP_TEMPORARY (M_WAITOK | M_CANFAIL) > +#define GFP_HIGHUSER 0 > +#define GFP_DMA32 __GFP_DMA32 > +#define GFP_TRANSHUGE_LIGHT 0 > > static inline bool > gfpflags_allow_blocking(const unsigned int flags) > Index: dev/pci/drm/ttm/ttm_tt.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/drm/ttm/ttm_tt.c,v > retrieving revision 1.8 > diff -u -p -r1.8 ttm_tt.c > --- dev/pci/drm/ttm/ttm_tt.c 14 Apr 2019 10:14:54 -0000 1.8 > +++ dev/pci/drm/ttm/ttm_tt.c 6 Jun 2019 21:19:22 -0000 > @@ -261,6 +261,7 @@ int ttm_dma_tt_init(struct ttm_dma_tt *t > uint32_t page_flags) > { > struct ttm_tt *ttm = &ttm_dma->ttm; > + int flags = BUS_DMA_WAITOK; > > ttm_tt_init_fields(ttm, bo, page_flags); > > @@ -276,8 +277,10 @@ int ttm_dma_tt_init(struct ttm_dma_tt *t > > ttm_dma->dmat = bo->bdev->dmat; > > + if ((page_flags & TTM_PAGE_FLAG_DMA32) == 0) > + flags |= BUS_DMA_64BIT; > if (bus_dmamap_create(ttm_dma->dmat, ttm->num_pages << PAGE_SHIFT, > - ttm->num_pages, ttm->num_pages << PAGE_SHIFT, 0, BUS_DMA_WAITOK, > + ttm->num_pages, ttm->num_pages << PAGE_SHIFT, 0, flags, > &ttm_dma->map)) { > free(ttm_dma->segs, M_DRM, 0); > ttm_tt_destroy(ttm); > @@ -293,6 +296,7 @@ int ttm_sg_tt_init(struct ttm_dma_tt *tt > uint32_t page_flags) > { > struct ttm_tt *ttm = &ttm_dma->ttm; > + int flags = BUS_DMA_WAITOK; > int ret; > > ttm_tt_init_fields(ttm, bo, page_flags); > @@ -313,8 +317,10 @@ int ttm_sg_tt_init(struct ttm_dma_tt *tt > > ttm_dma->dmat = bo->bdev->dmat; > > + if ((page_flags & TTM_PAGE_FLAG_DMA32) == 0) > + flags |= BUS_DMA_64BIT; > if (bus_dmamap_create(ttm_dma->dmat, ttm->num_pages << PAGE_SHIFT, > - ttm->num_pages, ttm->num_pages << PAGE_SHIFT, 0, BUS_DMA_WAITOK, > + ttm->num_pages, ttm->num_pages << PAGE_SHIFT, 0, flags, > &ttm_dma->map)) { > free(ttm_dma->segs, M_DRM, 0); > ttm_tt_destroy(ttm); >