As a result of a recent discussion with jsg@, I realized that the graphics drivers are (mostly) allocating memory from the dma region. Since the the graphics stack can potentially gobble up large amounts of memory, this means we can run out of dma memory which makes other parts of our kernel quite unhappy. Most of the supported hardware actually supports 64-bit DMA just fine, and the drivers already have code to handle the exceptions. The diff below makes use of this knowledge to (hopefully) safely allocate from "high" memory when possible. One big change is that this makes bus_dma(9) 64-bit DMA aware in the sense that if the BUS_DMA_64BIT flag is used, we skip the "not dma-reachable" panic.
It seems to work fine on my Intel Broadwell laptop. I haven't tested this on radeon(4) yet. So further testing, especially on systems with 4GB of memory or more is necessary. Please test. Index: arch/amd64/amd64/bus_dma.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/bus_dma.c,v retrieving revision 1.50 diff -u -p -r1.50 bus_dma.c --- arch/amd64/amd64/bus_dma.c 14 Oct 2017 04:44:43 -0000 1.50 +++ arch/amd64/amd64/bus_dma.c 6 Jun 2019 21:19:21 -0000 @@ -319,7 +319,8 @@ _bus_dmamap_load_raw(bus_dma_tag_t t, bu if (plen < sgsize) sgsize = plen; - if (paddr > dma_constraint.ucr_high) + if (paddr > dma_constraint.ucr_high && + (map->_dm_flags & BUS_DMA_64BIT) == 0) panic("Non dma-reachable buffer at paddr %#lx(raw)", paddr); @@ -583,7 +584,8 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, */ pmap_extract(pmap, vaddr, (paddr_t *)&curaddr); - if (curaddr > dma_constraint.ucr_high) + if (curaddr > dma_constraint.ucr_high && + (map->_dm_flags & BUS_DMA_64BIT) == 0) panic("Non dma-reachable buffer at curaddr %#lx(raw)", curaddr); Index: dev/pci/drm/drm_linux.c =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v retrieving revision 1.37 diff -u -p -r1.37 drm_linux.c --- dev/pci/drm/drm_linux.c 4 Jun 2019 12:08:22 -0000 1.37 +++ dev/pci/drm/drm_linux.c 6 Jun 2019 21:19:21 -0000 @@ -293,16 +293,19 @@ struct vm_page * alloc_pages(unsigned int gfp_mask, unsigned int order) { int flags = (gfp_mask & M_NOWAIT) ? UVM_PLA_NOWAIT : UVM_PLA_WAITOK; + struct uvm_constraint_range *constraint = &no_constraint; struct pglist mlist; if (gfp_mask & M_CANFAIL) flags |= UVM_PLA_FAILOK; if (gfp_mask & M_ZERO) flags |= UVM_PLA_ZERO; + if (gfp_mask & __GFP_DMA32) + constraint = &dma_constraint; TAILQ_INIT(&mlist); - if (uvm_pglistalloc(PAGE_SIZE << order, dma_constraint.ucr_low, - dma_constraint.ucr_high, PAGE_SIZE, 0, &mlist, 1, flags)) + if (uvm_pglistalloc(PAGE_SIZE << order, constraint->ucr_low, + constraint->ucr_high, PAGE_SIZE, 0, &mlist, 1, flags)) return NULL; return TAILQ_FIRST(&mlist); } Index: dev/pci/drm/include/linux/gfp.h =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/include/linux/gfp.h,v retrieving revision 1.1 diff -u -p -r1.1 gfp.h --- dev/pci/drm/include/linux/gfp.h 14 Apr 2019 10:14:53 -0000 1.1 +++ dev/pci/drm/include/linux/gfp.h 6 Jun 2019 21:19:21 -0000 @@ -7,24 +7,25 @@ #include <sys/malloc.h> #include <uvm/uvm_extern.h> -#define GFP_ATOMIC M_NOWAIT -#define GFP_NOWAIT M_NOWAIT -#define GFP_KERNEL (M_WAITOK | M_CANFAIL) -#define GFP_USER (M_WAITOK | M_CANFAIL) -#define GFP_TEMPORARY (M_WAITOK | M_CANFAIL) -#define GFP_HIGHUSER 0 -#define GFP_DMA32 0 -#define __GFP_NOWARN 0 -#define __GFP_NORETRY 0 -#define __GFP_ZERO M_ZERO +#define __GFP_ZERO M_ZERO +#define __GFP_DMA32 0x80000000 +#define __GFP_NOWARN 0 +#define __GFP_NORETRY 0 #define __GFP_RETRY_MAYFAIL 0 #define __GFP_MOVABLE 0 #define __GFP_COMP 0 -#define GFP_TRANSHUGE_LIGHT 0 #define __GFP_KSWAPD_RECLAIM 0 #define __GFP_HIGHMEM 0 #define __GFP_RECLAIMABLE 0 -#define __GFP_DMA32 0 + +#define GFP_ATOMIC M_NOWAIT +#define GFP_NOWAIT M_NOWAIT +#define GFP_KERNEL (M_WAITOK | M_CANFAIL) +#define GFP_USER (M_WAITOK | M_CANFAIL) +#define GFP_TEMPORARY (M_WAITOK | M_CANFAIL) +#define GFP_HIGHUSER 0 +#define GFP_DMA32 __GFP_DMA32 +#define GFP_TRANSHUGE_LIGHT 0 static inline bool gfpflags_allow_blocking(const unsigned int flags) Index: dev/pci/drm/ttm/ttm_tt.c =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/ttm/ttm_tt.c,v retrieving revision 1.8 diff -u -p -r1.8 ttm_tt.c --- dev/pci/drm/ttm/ttm_tt.c 14 Apr 2019 10:14:54 -0000 1.8 +++ dev/pci/drm/ttm/ttm_tt.c 6 Jun 2019 21:19:22 -0000 @@ -261,6 +261,7 @@ int ttm_dma_tt_init(struct ttm_dma_tt *t uint32_t page_flags) { struct ttm_tt *ttm = &ttm_dma->ttm; + int flags = BUS_DMA_WAITOK; ttm_tt_init_fields(ttm, bo, page_flags); @@ -276,8 +277,10 @@ int ttm_dma_tt_init(struct ttm_dma_tt *t ttm_dma->dmat = bo->bdev->dmat; + if ((page_flags & TTM_PAGE_FLAG_DMA32) == 0) + flags |= BUS_DMA_64BIT; if (bus_dmamap_create(ttm_dma->dmat, ttm->num_pages << PAGE_SHIFT, - ttm->num_pages, ttm->num_pages << PAGE_SHIFT, 0, BUS_DMA_WAITOK, + ttm->num_pages, ttm->num_pages << PAGE_SHIFT, 0, flags, &ttm_dma->map)) { free(ttm_dma->segs, M_DRM, 0); ttm_tt_destroy(ttm); @@ -293,6 +296,7 @@ int ttm_sg_tt_init(struct ttm_dma_tt *tt uint32_t page_flags) { struct ttm_tt *ttm = &ttm_dma->ttm; + int flags = BUS_DMA_WAITOK; int ret; ttm_tt_init_fields(ttm, bo, page_flags); @@ -313,8 +317,10 @@ int ttm_sg_tt_init(struct ttm_dma_tt *tt ttm_dma->dmat = bo->bdev->dmat; + if ((page_flags & TTM_PAGE_FLAG_DMA32) == 0) + flags |= BUS_DMA_64BIT; if (bus_dmamap_create(ttm_dma->dmat, ttm->num_pages << PAGE_SHIFT, - ttm->num_pages, ttm->num_pages << PAGE_SHIFT, 0, BUS_DMA_WAITOK, + ttm->num_pages, ttm->num_pages << PAGE_SHIFT, 0, flags, &ttm_dma->map)) { free(ttm_dma->segs, M_DRM, 0); ttm_tt_destroy(ttm);