oops. ignore this - diff in wrong directory..
On 7 July 2011 15:53, Bob Beck <b...@cvs.openbsd.org> wrote: > Want more buffer cache? please have a try with this. > > This diff breaks the buffer cache into the dma'able region, and the > above dma-able region of memory. buffers are always allocated in > the dma'able region, and as they age they are moved above the dma'able > region if such memory exists. I/O operations on buffers in high > memory flip the buffer back into dma-able memory first. > > With this diff you can have huge tracts of buffer cache on amd64 but this > also needs testing on all arch's. > > > Index: kern_sysctl.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_sysctl.c,v > retrieving revision 1.206 > diff -u -p -r1.206 kern_sysctl.c > --- kern_sysctl.c 5 Jul 2011 04:48:02 -0000 1.206 > +++ kern_sysctl.c 7 Jul 2011 21:09:33 -0000 > @@ -112,6 +112,7 @@ extern struct disklist_head disklist; > extern fixpt_t ccpu; > extern long numvnodes; > extern u_int mcllivelocks; > +extern psize_t b_dmapages_total, b_highpages_total, b_dmamaxpages; > > extern void nmbclust_update(void); > > @@ -566,8 +567,8 @@ kern_sysctl(int *name, u_int namelen, vo > return (sysctl_int(oldp, oldlenp, newp, newlen, > &rthreads_enabled)); > case KERN_CACHEPCT: { > - u_int64_t dmapages; > - int opct, pgs; > + psize_t pgs; > + int opct; > opct = bufcachepercent; > error = sysctl_int(oldp, oldlenp, newp, newlen, > &bufcachepercent); > @@ -577,11 +578,13 @@ kern_sysctl(int *name, u_int namelen, vo > bufcachepercent = opct; > return (EINVAL); > } > - dmapages = uvm_pagecount(&dma_constraint); > if (bufcachepercent != opct) { > - pgs = bufcachepercent * dmapages / 100; > + pgs = (b_highpages_total + b_dmapages_total) > + * bufcachepercent / 100; > + b_dmamaxpages = b_dmapages_total * bufcachepercent > + / 100; > bufadjust(pgs); /* adjust bufpages */ > - bufhighpages = bufpages; /* set high water mark */ > + bufhighpages = bufpages; > } > return(0); > } > Index: spec_vnops.c > =================================================================== > RCS file: /cvs/src/sys/kern/spec_vnops.c,v > retrieving revision 1.67 > diff -u -p -r1.67 spec_vnops.c > --- spec_vnops.c 5 Jul 2011 05:37:07 -0000 1.67 > +++ spec_vnops.c 6 Jul 2011 22:44:00 -0000 > @@ -457,7 +457,9 @@ spec_strategy(void *v) > struct vop_strategy_args *ap = v; > struct buf *bp = ap->a_bp; > int maj = major(bp->b_dev); > - > + > + if (!ISSET(bp->b_flags, B_DAQ) && ISSET(bp->b_flags, B_BC)) > + panic("bogus buf passed to spec_strategy"); > if (LIST_FIRST(&bp->b_dep) != NULL) > buf_start(bp); > > Index: vfs_bio.c > =================================================================== > RCS file: /cvs/src/sys/kern/vfs_bio.c,v > retrieving revision 1.133 > diff -u -p -r1.133 vfs_bio.c > --- vfs_bio.c 6 Jul 2011 20:50:05 -0000 1.133 > +++ vfs_bio.c 7 Jul 2011 21:34:52 -0000 > @@ -68,9 +68,13 @@ > #define BQ_DIRTY 0 /* LRU queue with dirty buffers */ > #define BQ_CLEAN 1 /* LRU queue with clean buffers */ > > -TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES]; > -int needbuffer; > +struct uvm_constraint_range high_constraint; > struct bio_ops bioops; > +TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES]; > +TAILQ_HEAD(bqda, buf) bufqueue_da; > +psize_t b_dmapages_total, b_highpages_total, b_dmamaxpages; > +int needbuffer,needda; > +int needda; > > /* > * Buffer pool for I/O buffers. > @@ -87,12 +91,13 @@ void buf_put(struct buf *); > > struct buf *bio_doread(struct vnode *, daddr64_t, int, int); > struct buf *buf_get(struct vnode *, daddr64_t, size_t); > +struct buf *buf_remove_from_freelist(struct buf *); > void bread_cluster_callback(struct buf *); > > /* > * We keep a few counters to monitor the utilization of the buffer cache > * > - * numbufpages - number of pages totally allocated. > + * numbufpages - number of pages totally allocated. > * numdirtypages - number of pages on BQ_DIRTY queue. > * lodirtypages - low water mark for buffer cleaning daemon. > * hidirtypages - high water mark for buffer cleaning daemon. > @@ -110,14 +115,112 @@ long hicleanpages; > long maxcleanpages; > long backoffpages; /* backoff counter for page allocations */ > long buflowpages; /* bufpages low water mark */ > -long bufhighpages; /* bufpages high water mark */ > -long bufbackpages; /* number of pages we back off when asked to shrink */ > +long bufhighpages; /* bufpages high water mark */ > +long bufbackpages; /* number of pages we back off when asked to shrink */ > + > +/* XXX - should be defined here but we have md issues */ > +extern int bufcachepercent; > > vsize_t bufkvm; > > struct proc *cleanerproc; > int bd_req; /* Sleep point for cleaner daemon. */ > > +/* nuke a buf off it's freelist - returns next buf. skips busy buffers */ > +struct buf * > +buf_remove_from_freelist(struct buf * bp) > +{ > + struct buf * nbp; > + nbp = TAILQ_NEXT(bp, b_freelist); > + /* skip busy buffers */ > + if (!ISSET(bp->b_flags, B_BUSY)) { > + bremfree(bp); > + if (bp->b_vp) { > + RB_REMOVE(buf_rb_bufs, > + &bp->b_vp->v_bufs_tree, bp); > + brelvp(bp); > + } > + buf_put(bp); > + } > + return(nbp); > +} > +/* > + * Add buf to the head of the dma reachable queue > + * and ensure that it is dma reachable. > + */ > +void > +buf_daq_add(struct buf *buf) > +{ > + struct buf *b; > + int s; > + > +start: > + KASSERT(ISSET(buf->b_flags, B_BC)); > + KASSERT(ISSET(buf->b_flags, B_BUSY)); > + KASSERT(buf->b_pobj != NULL); > + s = splbio(); > + /* > + * if we are adding to the queue, ensure we free down below the > + * max > + */ > + while (b_highpages_total && > + (!ISSET(buf->b_flags, B_DAQ)) && (!ISSET(buf->b_flags, B_DMA)) && > + (bcstats.dmapages > (b_dmamaxpages - atop(buf->b_bufsize)))) { > + b = TAILQ_FIRST(&bufqueue_da); > + /* find first non-busy buffer */ > + while (b && ISSET(b->b_flags, B_BUSY)) > + b = TAILQ_NEXT(b, b_qda); > + if (b == NULL) { > + /* no non-busy buffers. */ > + needda++; > + tsleep(&needda, PRIBIO, "needda", 0); > + needda--; > + splx(s); > + goto start; > + } else { > + if (b_highpages_total) { > + buf_acquire_unmapped(b); > + /* move buffer to above dma reachable memory */ > + TAILQ_REMOVE(&bufqueue_da, b, b_qda); > + buf_realloc_pages(b, &high_constraint); > + if (ISSET(b->b_flags, B_DMA)) > + panic("B_DMA after high flip %p", b); > + CLR(b->b_flags, B_DAQ); > + buf_release(b); > + splx(s); > + goto start; > + } else { > + /* no high pages to flip to. */ > + needda++; > + tsleep(&needda, PRIBIO, "needda", 0); > + needda--; > + splx(s); > + goto start; > + } > + } > + } > + /* don't copy it if it's already in dma reachable memory */ > + if (ISSET(buf->b_flags, B_DMA)) { > + /* buf already there, just move it to the end */ > + if (ISSET(buf->b_flags, B_DAQ)) > + TAILQ_REMOVE(&bufqueue_da, buf, b_qda); > + TAILQ_INSERT_TAIL(&bufqueue_da, buf, b_qda); > + SET(buf->b_flags, B_DAQ); > + } else { > + if (ISSET(buf->b_flags, B_DAQ)) > + panic("non-dma buffer on dma queue %p\n", buf); > + /* move buf to dma reachable memory */ > + buf_realloc_pages(buf, &dma_constraint); > + if (!ISSET(buf->b_flags, B_DMA)) > + panic("non-dma buffer after dma move %p\n", buf); > + TAILQ_INSERT_TAIL(&bufqueue_da, buf, b_qda); > + SET(buf->b_flags, B_DAQ); > + } > + splx(s); > + return; > + > +} > + > void > bremfree(struct buf *bp) > { > @@ -139,11 +242,10 @@ bremfree(struct buf *bp) > if (dp == &bufqueues[BQUEUES]) > panic("bremfree: lost tail"); > } > - if (!ISSET(bp->b_flags, B_DELWRI)) { > + if (!ISSET(bp->b_flags, B_DELWRI)) > bcstats.numcleanpages -= atop(bp->b_bufsize); > - } else { > + else > bcstats.numdirtypages -= atop(bp->b_bufsize); > - } > TAILQ_REMOVE(dp, bp, b_freelist); > bcstats.freebufs--; > } > @@ -175,7 +277,10 @@ buf_put(struct buf *bp) > if (backoffpages < 0) > backoffpages = 0; > } > - > + if (ISSET(bp->b_flags, B_DAQ)) { > + TAILQ_REMOVE(&bufqueue_da, bp, b_qda); > + CLR(bp->b_flags, B_DAQ); > + } > if (buf_dealloc_mem(bp) != 0) > return; > pool_put(&bufpool, bp); > @@ -187,10 +292,22 @@ buf_put(struct buf *bp) > void > bufinit(void) > { > - u_int64_t dmapages; > struct bqueues *dp; > > - dmapages = uvm_pagecount(&dma_constraint); > + bufhighpages = buflowpages = bufpages = bufcachepercent = bufkvm = 0; > + /* > + * XXX note this really is "high" - i.e. *above* dma_constraint > + */ > + high_constraint.ucr_low = dma_constraint.ucr_high; > + high_constraint.ucr_high = no_constraint.ucr_high; > + > + /* do we have memory above dma_constraint, or not? */ > + if (high_constraint.ucr_low != high_constraint.ucr_high) { > + high_constraint.ucr_low++; > + b_highpages_total = uvm_pagecount(&high_constraint); > + } else > + b_highpages_total = 0; > + b_dmapages_total = uvm_pagecount(&dma_constraint); > > /* > * If MD code doesn't say otherwise, use 10% of kvm for mappings and > @@ -199,25 +316,31 @@ bufinit(void) > if (bufcachepercent == 0) > bufcachepercent = 10; > if (bufpages == 0) > - bufpages = dmapages * bufcachepercent / 100; > + bufpages = (b_highpages_total + b_dmapages_total) > + * bufcachepercent / 100; > > bufhighpages = bufpages; > + b_dmamaxpages = b_dmapages_total * bufcachepercent / 100; > + > + printf("buffer cache from %d dma pages and %d high pages\n", > + b_dmapages_total, b_highpages_total); > > /* > * set the base backoff level for the buffer cache to bufpages. > * we will not allow uvm to steal back more than this number of > * pages > */ > - buflowpages = dmapages * 10 / 100; > + buflowpages = b_dmapages_total * 10 / 100; > > /* > - * set bufbackpages to 100 pages, or 10 percent of the low water mark > - * if we don't have that many pages. > + * set bufbackpages to 1 MB worth or pages, or 10 percent of > + * the low water mark if we don't have that many pages. > */ > > bufbackpages = buflowpages * 10 / 100; > - if (bufbackpages > 100) > - bufbackpages = 100; > + > + if (bufbackpages > (1048576 / PAGE_SIZE)) > + bufbackpages = (1048576 / PAGE_SIZE); > > if (bufkvm == 0) > bufkvm = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 10; > @@ -238,15 +361,16 @@ bufinit(void) > pool_setipl(&bufpool, IPL_BIO); > for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) > TAILQ_INIT(dp); > + TAILQ_INIT(&bufqueue_da); > > /* > * hmm - bufkvm is an argument because it's static, while > * bufpages is global because it can change while running. > - */ > + */ > buf_mem_init(bufkvm); > > - hidirtypages = (bufpages / 4) * 3; > - lodirtypages = bufpages / 2; > + hidirtypages = (b_dmamaxpages / 4) * 3; > + lodirtypages = b_dmamaxpages / 2; > > /* > * When we hit 95% of pages being clean, we bring them down to > @@ -259,6 +383,39 @@ bufinit(void) > } > > /* > + * Flip some dma reachable cache pages high > + */ > +void > +bufhigh(int delta) > +{ > + psize_t newdmapages; > + struct buf *b; > + int s; > + > + if (!b_highpages_total) > + return; > + s = splbio(); > + newdmapages = bcstats.dmapages - delta; > + while ((bcstats.dmapages > newdmapages) && > + (b = TAILQ_FIRST(&bufqueue_da))) { > + while (ISSET(b->b_flags, B_BUSY)) > + b = TAILQ_NEXT(b, b_qda); > + if (b != NULL) { > + buf_acquire_unmapped(b); > + /* move buffer to above dma reachable memory */ > + buf_realloc_pages(b, &high_constraint); > + if (ISSET(b->b_flags, B_DMA)) > + panic("DMA flagged buffer after high flip %p", b); > + TAILQ_REMOVE(&bufqueue_da, b, b_qda); > + CLR(b->b_flags, B_DAQ); > + buf_release(b); > + } > + } > + wakeup(&needda); > + splx(s); > +} > + > +/* > * Change cachepct > */ > void > @@ -272,10 +429,19 @@ bufadjust(int newbufpages) > int s; > > s = splbio(); > + /* XXX for hibernate - throw away everything we can.*/ > + if (newbufpages == 0) { > + bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]); > + while (bp) > + bp = buf_remove_from_freelist(bp); > + splx(s); > + return; > + } > + > bufpages = newbufpages; > > - hidirtypages = (bufpages / 4) * 3; > - lodirtypages = bufpages / 2; > + hidirtypages = (b_dmamaxpages / 4) * 3; > + lodirtypages = b_dmamaxpages / 2; > > /* > * When we hit 95% of pages being clean, we bring them down to > @@ -291,16 +457,9 @@ bufadjust(int newbufpages) > * free them up to get back down. this may possibly consume > * all our clean pages... > */ > - while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) && > - (bcstats.numbufpages > bufpages)) { > - bremfree(bp); > - if (bp->b_vp) { > - RB_REMOVE(buf_rb_bufs, > - &bp->b_vp->v_bufs_tree, bp); > - brelvp(bp); > - } > - buf_put(bp); > - } > + bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]); > + while (bp && (bcstats.numbufpages > bufpages)) > + bp = buf_remove_from_freelist(bp); > > /* > * Wake up cleaner if we're getting low on pages. We might > @@ -336,23 +495,39 @@ bufbackoff(struct uvm_constraint_range * > * On success, it frees N pages from the buffer cache, and sets > * a flag so that the next N allocations from buf_get will recycle > * a buffer rather than allocate a new one. It then returns 0 to the > - * caller. > + * caller. > * > * on failure, it could free no pages from the buffer cache, does > - * nothing and returns -1 to the caller. > + * nothing and returns -1 to the caller. > + */ > + > + psize_t d, s; > + > + /* > + * back of by at least bufbackpages, or bufbackpages + what > + * the pagedaemon needs if it happens to know when it calls us > */ > - long d; > + s = (size > 0) ? bufbackpages + size : bufbackpages; > > - if (bufpages <= buflowpages) > + if (bufpages <= buflowpages) > return(-1); > > - if (bufpages - bufbackpages >= buflowpages) > - d = bufbackpages; > + if (bufpages - s >= buflowpages) > + d = s; > else > d = bufpages - buflowpages; > - backoffpages = bufbackpages; > - bufadjust(bufpages - d); > - backoffpages = bufbackpages; > + > + if (b_highpages_total > + && (range->ucr_high <= dma_constraint.ucr_high)) { > + if (bcstats.dmapages - s > b_dmamaxpages) > + s += (bcstats.dmapages - b_dmamaxpages); > + bufhigh(s); > + } > + else { > + backoffpages = bufbackpages; > + bufadjust(bufpages - d); > + backoffpages = bufbackpages; > + } > return(0); > } > > @@ -534,12 +709,18 @@ bread_cluster(struct vnode *vp, daddr64_ > for (i = 1; i < howmany; i++) { > bcstats.pendingreads++; > bcstats.numreads++; > - SET(xbpp[i]->b_flags, B_READ | B_ASYNC); > + /* > + * We set B_DMA here because bp above should be > + * and we are playing buffer slice-n-dice games > + * from the memory allocated in bp. > + */ > + SET(xbpp[i]->b_flags, B_DMA | B_READ | B_ASYNC); > xbpp[i]->b_blkno = sblkno + (i * inc); > xbpp[i]->b_bufsize = xbpp[i]->b_bcount = size; > xbpp[i]->b_data = NULL; > xbpp[i]->b_pobj = bp->b_pobj; > xbpp[i]->b_poffs = bp->b_poffs + (i * size); > + buf_daq_add(xbpp[i]); > } > > KASSERT(bp->b_lblkno == blkno + 1); > @@ -618,7 +799,7 @@ bwrite(struct buf *bp) > reassignbuf(bp); > } else > curproc->p_stats->p_ru.ru_oublock++; > - > + > > /* Initiate disk write. Make sure the appropriate party is charged. */ > bp->b_vp->v_numoutput++; > @@ -793,6 +974,8 @@ brelse(struct buf *bp) > CLR(bp->b_flags, B_WANTED); > wakeup(bp); > } > + if (ISSET(bp->b_flags, B_DMA) && needda) > + wakeup(&needda); > if (bp->b_vp != NULL) > RB_REMOVE(buf_rb_bufs, > &bp->b_vp->v_bufs_tree, bp); > @@ -833,19 +1016,6 @@ brelse(struct buf *bp) > bcstats.freebufs++; > CLR(bp->b_flags, (B_AGE | B_ASYNC | B_NOCACHE | B_DEFERRED)); > buf_release(bp); > - > - /* Wake up any processes waiting for any buffer to become free. */ > - if (needbuffer) { > - needbuffer--; > - wakeup(&needbuffer); > - } > - > - /* Wake up any processes waiting for _this_ buffer to become free. */ > - if (ISSET(bp->b_flags, B_WANTED)) { > - CLR(bp->b_flags, B_WANTED); > - wakeup(bp); > - } > - > splx(s); > } > > @@ -981,16 +1151,9 @@ buf_get(struct vnode *vp, daddr64_t blkn > * free down to the low water mark. > */ > if (bcstats.numcleanpages > hicleanpages) { > - while (bcstats.numcleanpages > locleanpages) { > - bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]); > - bremfree(bp); > - if (bp->b_vp) { > - RB_REMOVE(buf_rb_bufs, > - &bp->b_vp->v_bufs_tree, bp); > - brelvp(bp); > - } > - buf_put(bp); > - } > + bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]); > + while (bp && (bcstats.numcleanpages > locleanpages)) > + bp = buf_remove_from_freelist(bp); > } > > npages = atop(round_page(size)); > @@ -1002,15 +1165,9 @@ buf_get(struct vnode *vp, daddr64_t blkn > || backoffpages) { > int freemax = 5; > int i = freemax; > - while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) && i--) { > - bremfree(bp); > - if (bp->b_vp) { > - RB_REMOVE(buf_rb_bufs, > - &bp->b_vp->v_bufs_tree, bp); > - brelvp(bp); > - } > - buf_put(bp); > - } > + bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]); > + while (bp && i--) > + bp = buf_remove_from_freelist(bp); > if (freemax == i && > (bcstats.numbufpages + npages > bufpages)) { > needbuffer++; > @@ -1027,6 +1184,8 @@ buf_get(struct vnode *vp, daddr64_t blkn > splx(s); > return (NULL); > } > + /* Mark buffer as the cache's */ > + SET(bp->b_flags, B_BC); > > bp->b_freelist.tqe_next = NOLIST; > bp->b_synctime = time_uptime + 300; > @@ -1041,7 +1200,7 @@ buf_get(struct vnode *vp, daddr64_t blkn > * We insert the buffer into the hash with B_BUSY set > * while we allocate pages for it. This way any getblk > * that happens while we allocate pages will wait for > - * this buffer instead of starting its own guf_get. > + * this buffer instead of starting its own buf_get. > * > * But first, we check if someone beat us to it. > */ > @@ -1067,10 +1226,9 @@ buf_get(struct vnode *vp, daddr64_t blkn > if (size) { > buf_alloc_pages(bp, round_page(size)); > buf_map(bp); > + buf_daq_add(bp); > } > - > splx(s); > - > return (bp); > } > > @@ -1082,23 +1240,46 @@ buf_daemon(struct proc *p) > { > struct timeval starttime, timediff; > struct buf *bp; > - int s; > + int s, nb, error; > > cleanerproc = curproc; > > s = splbio(); > for (;;) { > + struct buf *nbp; > if (bcstats.numdirtypages < hidirtypages) > tsleep(&bd_req, PRIBIO - 7, "cleaner", 0); > > getmicrouptime(&starttime); > - > +start: > + nb = 0; > while ((bp = TAILQ_FIRST(&bufqueues[BQ_DIRTY]))) { > struct timeval tv; > + nbp = TAILQ_NEXT(bp, b_freelist); > > if (bcstats.numdirtypages < lodirtypages) > break; > > + /* > + * If we haven't found any other buffers to > + * process and this last one is busy, wait for > + * it and restart. otherwise, continue and > + * process the rest of them.. > + * > + */ > + if ((nb == 0) && (nbp == NULL) && > + ISSET(bp->b_flags, B_BUSY)) { > + SET(bp->b_flags, B_WANTED); > + error = tsleep(bp, PRIBIO + 1, "getblk", 0); > + splx(s); > + if (error) > + return; > + s = splbio(); > + goto start; > + } else { > + continue; > + } > + nb++; > bremfree(bp); > buf_acquire(bp); > splx(s); > @@ -1132,7 +1313,6 @@ buf_daemon(struct proc *p) > s = splbio(); > if (timediff.tv_sec) > break; > - > } > } > } > Index: vfs_biomem.c > =================================================================== > RCS file: /cvs/src/sys/kern/vfs_biomem.c,v > retrieving revision 1.17 > diff -u -p -r1.17 vfs_biomem.c > --- vfs_biomem.c 7 Apr 2011 19:07:42 -0000 1.17 > +++ vfs_biomem.c 7 Jul 2011 21:17:09 -0000 > @@ -33,6 +33,8 @@ TAILQ_HEAD(,buf) buf_valist; > int buf_nkvmsleep; > > extern struct bcachestats bcstats; > +extern int needbuffer; > +extern int needda; > > /* > * Pages are allocated from a uvm object (we only use it for page storage, > @@ -99,6 +101,11 @@ buf_acquire_unmapped(struct buf *bp) > > s = splbio(); > SET(bp->b_flags, B_BUSY|B_NOTMAPPED); > + /* XXX */ > + if (bp->b_data != NULL) { > + TAILQ_REMOVE(&buf_valist, bp, b_valist); > + bcstats.busymapped++; > + } > splx(s); > } > > @@ -170,6 +177,24 @@ buf_release(struct buf *bp) > } > } > CLR(bp->b_flags, B_BUSY|B_NOTMAPPED); > + if (ISSET(bp->b_flags, B_DMA) && needda) { > + wakeup(&needda); > + } > + /* Wake up any processes waiting for any buffer to become free. */ > + if (needbuffer) { > + needbuffer--; > + wakeup(&needbuffer); > + } > + > + /* > + * Wake up any processes waiting for _this_ buffer to become > + * free. > + */ > + > + if (ISSET(bp->b_flags, B_WANTED)) { > + CLR(bp->b_flags, B_WANTED); > + wakeup(bp); > + } > splx(s); > } > > @@ -286,6 +311,8 @@ buf_alloc_pages(struct buf *bp, vsize_t > > uvm_pagealloc_multi(buf_object, offs, size, UVM_PLA_WAITOK); > bcstats.numbufpages += atop(size); > + bcstats.dmapages += atop(size); > + SET(bp->b_flags, B_DMA); > bp->b_pobj = buf_object; > bp->b_poffs = offs; > bp->b_bufsize = size; > @@ -302,6 +329,7 @@ buf_free_pages(struct buf *bp) > > KASSERT(bp->b_data == NULL); > KASSERT(uobj != NULL); > + KASSERT(!ISSET(bp->b_flags, B_DAQ)); > > s = splbio(); > > @@ -316,11 +344,57 @@ buf_free_pages(struct buf *bp) > pg->wire_count = 0; > uvm_pagefree(pg); > bcstats.numbufpages--; > + if (ISSET(bp->b_flags, B_DMA)) > + bcstats.dmapages--; > } > + CLR(bp->b_flags, B_DMA); > splx(s); > } > > -/* > - * XXX - it might make sense to make a buf_realloc_pages to avoid > - * bouncing through the free list all the time. > - */ > +/* Reallocate a buf into a particular location specified by "where" */ > +void > +buf_realloc_pages(struct buf *bp, struct uvm_constraint_range *where) > +{ > + vaddr_t va; > + int dma; > + int s, i; > + > + s = splbio(); > + KASSERT(ISSET(bp->b_flags, B_BUSY)); > + dma = ISSET(bp->b_flags, B_DMA); > + > + /* if the original buf is mapped, unmap it */ > + if (bp->b_data != NULL) { > + va = (vaddr_t)bp->b_data; > + pmap_kremove(va, bp->b_bufsize); > + pmap_update(pmap_kernel()); > + } > + uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs, bp->b_bufsize, > + UVM_PLA_WAITOK, where); > + /* > + * do this now, and put it back later when we know where we are > + */ > + if (dma) > + bcstats.dmapages -= atop(bp->b_bufsize); > + > + dma = 1; > + /* if the original buf was mapped, re-map it */ > + for (i = 0; i < atop(bp->b_bufsize); i++) { > + struct vm_page *pg = uvm_pagelookup(bp->b_pobj, > + bp->b_poffs + ptoa(i)); > + KASSERT(pg != NULL); > + if (!PADDR_IS_DMA_REACHABLE(VM_PAGE_TO_PHYS(pg))) > + dma = 0; > + if (bp->b_data != NULL) { > + pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg), > + VM_PROT_READ|VM_PROT_WRITE); > + pmap_update(pmap_kernel()); > + } > + } > + if (dma) { > + SET(bp->b_flags, B_DMA); > + bcstats.dmapages += atop(bp->b_bufsize); > + } else > + CLR(bp->b_flags, B_DMA); > + splx(s); > +} > Index: vfs_vops.c > =================================================================== > RCS file: /cvs/src/sys/kern/vfs_vops.c,v > retrieving revision 1.4 > diff -u -p -r1.4 vfs_vops.c > --- vfs_vops.c 2 Jul 2011 15:52:25 -0000 1.4 > +++ vfs_vops.c 6 Jul 2011 22:39:28 -0000 > @@ -614,6 +614,17 @@ VOP_STRATEGY(struct buf *bp) > if (bp->b_vp->v_op->vop_strategy == NULL) > return (EOPNOTSUPP); > > + /* > + * Flip buffer to dma reachable memory if > + * necessary. > + * > + * XXX if you're making your own buffers and not > + * having the buffer cache manage them then it's your > + * problem to ensure they can be dma'ed to and from. > + */ > + if (ISSET(bp->b_flags, B_BC)) > + buf_daq_add(bp); > + > return ((bp->b_vp->v_op->vop_strategy)(&a)); > }