This diff moves the buffer cache to only be allocated out of dma'able
 memory, along with a few pieces (flags) that I will need for the next step
 of allowing it to touch high memory.

 Appears to behave well for me under load and builds survive it with
 the buffer cache cranked up.

 I would like to get this in and a first step as it allows me to progress,
 and will allow others to fix some of the other problems with drivers without 
the buffer cache getting in the way with bigmem turned on. 

 -Bob

------8<----
Index: sys/kern/vfs_bio.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_bio.c,v
retrieving revision 1.127
diff -u -r1.127 vfs_bio.c
--- sys/kern/vfs_bio.c  13 Nov 2010 17:45:44 -0000      1.127
+++ sys/kern/vfs_bio.c  1 Apr 2011 08:56:34 -0000
@@ -191,19 +191,43 @@
 void
 bufinit(void)
 {
+       long low, high, dmapages, highpages;
        struct bqueues *dp;
 
        /* XXX - for now */
        bufhighpages = buflowpages = bufpages = bufcachepercent = bufkvm = 0;
 
        /*
+        * First off, figure out how much of memory we can use. 
+        *
+        * XXX for now we only use dma-able memory
+        *
+        * XXX - this isn't completely accurate, because their may
+        * be holes in the physical memory. This needs to be replaced
+        * with a uvm_pmemrange function to tell us how many pages
+        * are within a constraint range - but this is accurate enough
+        * for now. 
+        */
+       
+       low = atop(dma_constraint.ucr_low);
+       high = atop(dma_constraint.ucr_high);
+       if (high >= physmem) {
+               high = physmem;
+               highpages = 0;
+       }
+       else 
+               highpages = physmem - high;
+       /* XXX highpages not used yet but will be very soon. */
+       dmapages = high - low;
+
+       /*
         * If MD code doesn't say otherwise, use 10% of kvm for mappings and
-        * 10% physmem for pages.
+        * 10% of dmaable pages for cache pages.
         */
        if (bufcachepercent == 0)
                bufcachepercent = 10;
        if (bufpages == 0)
-               bufpages = physmem * bufcachepercent / 100;
+               bufpages = dmapages * bufcachepercent / 100;
 
        bufhighpages = bufpages;
 
@@ -212,7 +236,7 @@
         * we will not allow uvm to steal back more than this number of
         * pages
         */
-       buflowpages = physmem * 10 / 100;
+       buflowpages = dmapages * 10 / 100;
 
        /*
         * set bufbackpages to 100 pages, or 10 percent of the low water mark
Index: sys/kern/vfs_biomem.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_biomem.c,v
retrieving revision 1.14
diff -u -r1.14 vfs_biomem.c
--- sys/kern/vfs_biomem.c       30 Apr 2010 21:56:39 -0000      1.14
+++ sys/kern/vfs_biomem.c       1 Apr 2011 05:32:21 -0000
@@ -29,10 +29,19 @@
 vaddr_t buf_kva_start, buf_kva_end;
 int buf_needva;
 TAILQ_HEAD(,buf) buf_valist;
+void   buf_realloc_pages(struct buf *, struct uvm_constraint_range *);
 
 int buf_nkvmsleep;
+#if 0
+extern int needda;
+#endif
+extern void                    uvm_pagealloc_multi(struct uvm_object *, voff_t,
+                           vsize_t, int);
+extern void                    uvm_pagerealloc_multi(struct uvm_object *, 
voff_t,
+                           vsize_t, int, struct uvm_constraint_range *);
 
 extern struct bcachestats bcstats;
+extern int needbuffer;
 
 /*
  * Pages are allocated from a uvm object (we only use it for page storage,
@@ -99,6 +108,10 @@
 
        s = splbio();
        SET(bp->b_flags, B_BUSY|B_NOTMAPPED);
+       if (bp->b_data != NULL) {
+               TAILQ_REMOVE(&buf_valist, bp, b_valist);
+               bcstats.busymapped++;
+       }
        splx(s);
 }
 
@@ -170,6 +183,26 @@
                }
        }
        CLR(bp->b_flags, B_BUSY|B_NOTMAPPED);
+#if 0
+       if (ISSET(bp->b_flags, B_DAQ) && needda) {
+               wakeup(&needda);
+       }
+#endif
+       /* Wake up any processes waiting for any buffer to become free. */
+       if (needbuffer) {
+               needbuffer--;
+               wakeup(&needbuffer);
+       }
+
+       /*
+        * Wake up any processes waiting for _this_ buffer to become
+        * free.
+        */
+
+       if (ISSET(bp->b_flags, B_WANTED)) {
+               CLR(bp->b_flags, B_WANTED);
+               wakeup(bp);
+       }
        splx(s);
 }
 
@@ -259,11 +292,11 @@
        return (va);
 }
 
+/* Always allocates in Device Accessible Memory */
 void
 buf_alloc_pages(struct buf *bp, vsize_t size)
 {
-       struct vm_page *pg;
-       voff_t offs, i;
+       voff_t offs;
        int s;
 
        KASSERT(size == round_page(size));
@@ -277,25 +310,51 @@
 
        KASSERT(buf_page_offset > 0);
 
-       for (i = 0; i < atop(size); i++) {
-#if defined(DEBUG) || 1
-               if ((pg = uvm_pagelookup(buf_object, offs + ptoa(i))))
-                       panic("buf_alloc_pages: overlap buf: %p page: %p",
-                           bp, pg);
-#endif
-
-               while ((pg = uvm_pagealloc(buf_object, offs + ptoa(i),
-                           NULL, 0)) == NULL) {
-                       uvm_wait("buf_alloc_pages");
-               }
-               pg->wire_count = 1;
-               atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
-               bcstats.numbufpages++;
-       }
-
+       uvm_pagealloc_multi(buf_object, offs, size, UVM_PLA_WAITOK);
+       bcstats.numbufpages+= atop(size);;
+       SET(bp->b_flags, B_DMA);
        bp->b_pobj = buf_object;
        bp->b_poffs = offs;
        bp->b_bufsize = size;
+       splx(s);
+}
+
+/* reallocate into a particular location specified by "where" */
+void
+buf_realloc_pages(struct buf *bp, struct uvm_constraint_range *where)
+{
+       vaddr_t va;
+       int dma = 1;
+       int s, i;
+
+       s = splbio();
+       KASSERT(ISSET(bp->b_flags, B_BUSY));
+
+       /* if the original buf is mapped, unmap it */
+       if ( bp->b_data != NULL) {
+               va = (vaddr_t)bp->b_data;
+               pmap_kremove(va, bp->b_bufsize);
+               pmap_update(pmap_kernel());
+       }
+       uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs, bp->b_bufsize,
+           UVM_PLA_WAITOK, where);
+       /* if the original buf was mapped, re-map it */
+       for (i = 0; i < atop(bp->b_bufsize); i++) {
+               struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
+                   bp->b_poffs + ptoa(i));
+               KASSERT(pg != NULL);
+               if  (!PADDR_IS_DMA_REACHABLE(VM_PAGE_TO_PHYS(pg)))
+                       dma = 0;
+               if (bp->b_data != NULL) {
+                       pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
+                           VM_PROT_READ|VM_PROT_WRITE);
+                       pmap_update(pmap_kernel());
+               }
+       }
+       if (dma)
+               SET(bp->b_flags, B_DMA);
+       else
+               CLR(bp->b_flags, B_DMA);
        splx(s);
 }
 
Index: sys/sys/buf.h
===================================================================
RCS file: /cvs/src/sys/sys/buf.h,v
retrieving revision 1.74
diff -u -r1.74 buf.h
--- sys/sys/buf.h       22 Sep 2010 01:18:57 -0000      1.74
+++ sys/sys/buf.h       1 Apr 2011 05:32:21 -0000
@@ -144,6 +144,7 @@
        LIST_ENTRY(buf) b_list;         /* All allocated buffers. */
        LIST_ENTRY(buf) b_vnbufs;       /* Buffer's associated vnode. */
        TAILQ_ENTRY(buf) b_freelist;    /* Free list position if not active. */
+       TAILQ_ENTRY(buf) b_qda;         /* Device Accisible queue position */
        time_t  b_synctime;             /* Time this buffer should be flushed */
        struct  proc *b_proc;           /* Associated proc; NULL if kernel. */
        volatile long   b_flags;        /* B_* flags. */
@@ -189,6 +190,7 @@
 /*
  * These flags are kept in b_flags.
  */
+#define        B_WRITE         0x00000000      /* Write buffer (pseudo flag). 
*/
 #define        B_AGE           0x00000001      /* Move to age queue when I/O 
done. */
 #define        B_NEEDCOMMIT    0x00000002      /* Needs committing to stable 
storage */
 #define        B_ASYNC         0x00000004      /* Start I/O, do not wait. */
@@ -197,29 +199,31 @@
 #define        B_CACHE         0x00000020      /* Bread found us in the cache. 
*/
 #define        B_CALL          0x00000040      /* Call b_iodone from biodone. 
*/
 #define        B_DELWRI        0x00000080      /* Delay I/O until buffer 
reused. */
+#define        B_PRIV          0x00000100      /* Privately allocated buffer */
 #define        B_DONE          0x00000200      /* I/O completed. */
 #define        B_EINTR         0x00000400      /* I/O was interrupted */
 #define        B_ERROR         0x00000800      /* I/O error occurred. */
-#define        B_INVAL         0x00002000      /* Does not contain valid info. 
*/
-#define        B_NOCACHE       0x00008000      /* Do not cache block after 
use. */
-#define        B_PHYS          0x00040000      /* I/O to user memory. */
-#define        B_RAW           0x00080000      /* Set by physio for raw 
transfers. */
-#define        B_READ          0x00100000      /* Read buffer. */
-#define        B_WANTED        0x00800000      /* Process wants this buffer. */
-#define        B_WRITE         0x00000000      /* Write buffer (pseudo flag). 
*/
-#define        B_WRITEINPROG   0x01000000      /* Write in progress. */
-#define        B_XXX           0x02000000      /* Debugging flag. */
-#define        B_DEFERRED      0x04000000      /* Skipped over for cleaning */
-#define        B_SCANNED       0x08000000      /* Block already pushed during 
sync */
-#define        B_PDAEMON       0x10000000      /* I/O started by pagedaemon */
-#define B_RELEASED     0x20000000      /* free this buffer after its kvm */
-#define B_NOTMAPPED    0x40000000      /* BUSY, but not necessarily mapped */
-
-#define        B_BITS  
"\010\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY\006CACHE" \
-    "\007CALL\010DELWRI\012DONE\013EINTR\014ERROR" \
-    "\016INVAL\020NOCACHE\023PHYS\024RAW\025READ" \
-    "\030WANTED\031WRITEINPROG\032XXX\033DEFERRED" \
-    "\034SCANNED\035PDAEMON"
+#define        B_INVAL         0x00001000      /* Does not contain valid info. 
*/
+#define        B_NOCACHE       0x00002000      /* Do not cache block after 
use. */
+#define        B_PHYS          0x00004000      /* I/O to user memory. */
+#define        B_RAW           0x00008000      /* Set by physio for raw 
transfers. */
+#define        B_READ          0x00010000      /* Read buffer. */
+#define        B_WANTED        0x00020000      /* Process wants this buffer. */
+#define        B_WRITEINPROG   0x00040000      /* Write in progress. */
+#define        B_XXX           0x00080000      /* Debugging flag. */
+#define        B_DEFERRED      0x00100000      /* Skipped over for cleaning */
+#define        B_SCANNED       0x00200000      /* Block already pushed during 
sync */
+#define        B_PDAEMON       0x00400000      /* I/O started by pagedaemon */
+#define B_RELEASED     0x00800000      /* free this buffer after its kvm */
+#define B_DMA          0x01000000      /* buffer is on the DA queue */
+#define B_DAQ          0x02000000      /* buffer is DMA reachable */
+#define B_NOTMAPPED    0x04000000      /* BUSY, but not necessarily mapped */
+
+#define        B_BITS  "\20\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY" \
+    "\006CACHE\007CALL\010DELWRI\011PRIV\012DONE\013EINTR\014ERROR" \
+    "\015INVAL\016NOCACHE\017PHYS\020RAW\021READ" \
+    "\022WANTED\023WRITEINPROG\024XXX(FORMAT)\025DEFERRED" \
+    "\026SCANNED\027DAEMON\030RELEASED\031DAQ\032DMA\033NOTMAPPED"
 
 /*
  * This structure describes a clustered I/O.  It is stored in the b_saveaddr
Index: sys/uvm/uvm_page.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_page.c,v
retrieving revision 1.102
diff -u -r1.102 uvm_page.c
--- sys/uvm/uvm_page.c  7 Aug 2010 03:50:02 -0000       1.102
+++ sys/uvm/uvm_page.c  1 Apr 2011 05:32:21 -0000
@@ -81,6 +81,11 @@
 
 #include <uvm/uvm.h>
 
+void                   uvm_pagealloc_multi(struct uvm_object *, voff_t,
+                           vsize_t, int);
+void                   uvm_pagerealloc_multi(struct uvm_object *, voff_t,
+                           vsize_t, int, struct uvm_constraint_range *);
+
 /*
  * for object trees
  */
@@ -801,6 +806,69 @@
        pg->owner_tag = NULL;
 #endif
        UVM_PAGE_OWN(pg, "new alloc");
+}
+
+/*
+ * interface used by the buffer cache to allocate a buffer at a time.
+ * The pages are allocated wired in DMA accessible memory
+ */
+void
+uvm_pagealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size, int 
flags)
+{
+       struct pglist    plist;
+       struct vm_page  *pg;
+       int              i, error;
+
+
+       TAILQ_INIT(&plist);
+       error = uvm_pglistalloc(size, dma_constraint.ucr_low,
+           dma_constraint.ucr_high, 0, 0, &plist, atop(round_page(size)),
+           UVM_PLA_WAITOK);
+       if (error)
+               panic("wtf - uvm_pglistalloc returned %x", error);
+       i = 0;
+       while((pg = TAILQ_FIRST(&plist)) != NULL) {
+         pg->wire_count = 1;
+         atomic_setbits_int(&pg->pg_flags, PG_CLEAN | PG_FAKE);
+         KASSERT((pg->pg_flags & PG_DEV) == 0);
+         TAILQ_REMOVE(&plist, pg, pageq);
+         uvm_pagealloc_pg(pg, obj, off + ptoa(i++), NULL);
+       }
+}
+
+/*
+ * interface used by the buffer cache to reallocate a buffer at a time.
+ * The pages are reallocated wired outside the DMA accessible region.
+ *
+ */
+void
+uvm_pagerealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size, int 
flags, struct uvm_constraint_range *where)
+{
+       struct pglist    plist;
+       struct vm_page  *pg, *tpg;
+       int              i, error;
+       voff_t          offset;
+
+
+       TAILQ_INIT(&plist);
+       if (size == 0)
+               panic("size 0 uvm_pagerealloc");
+       error = uvm_pglistalloc(size, where->ucr_low, where->ucr_high, 0,
+           0, &plist, atop(round_page(size)), UVM_PLA_WAITOK);
+       if (error)
+               panic("wtf - uvm_pglistalloc returned %x", error);
+       i = 0;
+       while((pg = TAILQ_FIRST(&plist)) != NULL) {
+         offset = off + ptoa(i++);
+         tpg = uvm_pagelookup(obj, offset);
+         pg->wire_count = 1;
+         atomic_setbits_int(&pg->pg_flags, PG_CLEAN | PG_FAKE);
+         KASSERT((pg->pg_flags & PG_DEV) == 0);
+         TAILQ_REMOVE(&plist, pg, pageq);
+         uvm_pagecopy(tpg, pg);
+         uvm_pagefree(tpg);
+         uvm_pagealloc_pg(pg, obj, offset, NULL);
+       }
 }
 
 /*
Index: sys/uvm/uvm_pdaemon.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
retrieving revision 1.56
diff -u -r1.56 uvm_pdaemon.c
--- sys/uvm/uvm_pdaemon.c       26 Sep 2010 12:53:27 -0000      1.56
+++ sys/uvm/uvm_pdaemon.c       1 Apr 2011 08:12:34 -0000
@@ -241,9 +241,9 @@
                /*
                 * get pages from the buffer cache, or scan if needed
                 */
-               if (((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
-                   ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
-                       if (bufbackoff() == -1)
+               if ((bufbackoff() == -1)
+                   && (((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
+                   ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg))) {
                                uvmpd_scan();
                }
 
Index: sys/uvm/uvm_swap.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_swap.c,v
retrieving revision 1.100
diff -u -r1.100 uvm_swap.c
--- sys/uvm/uvm_swap.c  21 Dec 2010 20:14:44 -0000      1.100
+++ sys/uvm/uvm_swap.c  1 Apr 2011 05:32:21 -0000
@@ -1952,7 +1952,8 @@
         * fill in the bp.   we currently route our i/o through
         * /dev/drum's vnode [swapdev_vp].
         */
-       bp->b_flags = B_BUSY | B_NOCACHE | B_RAW | (flags & (B_READ|B_ASYNC));
+       bp->b_flags = B_PRIV | B_BUSY | B_NOCACHE | B_RAW | 
+         (flags & (B_READ|B_ASYNC));
        bp->b_proc = &proc0;    /* XXX */
        bp->b_vnbufs.le_next = NOLIST;
        if (bounce)

Reply via email to