On Fri, May 16, 2008 at 08:21:25AM -0700, Darrian Hale wrote: > Can you please point me to where the diffs you refer to reside? > > I'd definitely like to try them out.
most of these are filed in sendbug (some for months) already... here is a cumulative diff also w/ a bonus himem high-quality software (in caase you managed to squeeze more than 4g of memory in your box ;). cu -- paranoic mickey (my employers have changed but, the name has remained) Index: arch/i386/conf/GENERIC =================================================================== RCS file: /cvs/src/sys/arch/i386/conf/GENERIC,v retrieving revision 1.603 diff -u -r1.603 GENERIC --- arch/i386/conf/GENERIC 25 Feb 2008 23:16:47 -0000 1.603 +++ arch/i386/conf/GENERIC 7 May 2008 12:55:43 -0000 @@ -37,6 +37,8 @@ config bsd swap generic mainbus0 at root +himem0 at root # himem.sys +scsibus* at himem? cpu0 at mainbus? bios0 at mainbus0 Index: arch/i386/conf/files.i386 =================================================================== RCS file: /cvs/src/sys/arch/i386/conf/files.i386,v retrieving revision 1.172 diff -u -r1.172 files.i386 --- arch/i386/conf/files.i386 4 Mar 2008 21:14:29 -0000 1.172 +++ arch/i386/conf/files.i386 7 May 2008 12:55:43 -0000 @@ -440,6 +440,10 @@ attach esm at mainbus file arch/i386/i386/esm.c esm needs-flag +device himem: scsi +attach himem at root +file arch/i386/i386/himem.c himem needs-flag + # # VESA # Index: arch/i386/i386/autoconf.c =================================================================== RCS file: /cvs/src/sys/arch/i386/i386/autoconf.c,v retrieving revision 1.78 diff -u -r1.78 autoconf.c --- arch/i386/i386/autoconf.c 27 Dec 2007 18:04:27 -0000 1.78 +++ arch/i386/i386/autoconf.c 7 May 2008 12:55:43 -0000 @@ -71,6 +71,7 @@ #include <dev/cons.h> #include "ioapic.h" +#include "himem.h" #if NIOAPIC > 0 #include <machine/i82093var.h> @@ -117,6 +118,10 @@ if (config_rootfound("mainbus", NULL) == NULL) panic("cpu_configure: mainbus not configured"); + +#if NHIMEM > 0 + config_rootfound("himem", NULL); +#endif #if NIOAPIC > 0 if (nioapics > 0) Index: arch/i386/i386/himem.c =================================================================== RCS file: arch/i386/i386/himem.c diff -N arch/i386/i386/himem.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ arch/i386/i386/himem.c 9 May 2008 09:23:37 -0000 @@ -0,0 +1,476 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2008 Michael Shalayeff + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/kthread.h> +#include <sys/queue.h> +#include <sys/mutex.h> +#include <sys/buf.h> +#include <sys/disk.h> +#include <sys/disklabel.h> + +#include <scsi/scsi_all.h> +#include <scsi/scsi_disk.h> +#include <scsi/scsiconf.h> +#include <scsi/sdvar.h> + +#include <uvm/uvm.h> + +/* arbitrary numbers */ +#define HIMEM_MAXCMDS 256 /* each one is a page */ + +/* derived from page table structure */ +#define HIMEM_OFFSET ((sizeof(struct hibuf) + 7) / 8) +#define HIMEM_MAXSEGS (512 - HIMEM_OFFSET - 2) +#define HIMEM_MAXPHYS (HIMEM_MAXSEGS * PAGE_SIZE) + +#define HIMEM_PDE (8) +#define HIMEM_VA (HIMEM_PDE << 21) +#define HIMEM_LOW (HIMEM_VA + (PAGE_SIZE * HIMEM_OFFSET)) +#define HIMEM_HIGH (HIMEM_VA + (PAGE_SIZE * 512)) +#define PDE_MASK ((512 * (PAGE_SIZE / DEV_BSIZE)) - 1) + +void himem_zefix(u_int64_t *, void *, void *, u_int); /* locore.s */ + +struct hibuf { + TAILQ_ENTRY(hibuf) hb_list; + paddr_t hb_pa; + struct scsi_xfer *hb_xs; + void *hb_src, *hb_dst; + u_int hb_bno, hb_len; + int hb_flags; +#define HIMEM_WAKE 0x0001 +}; + +struct himem_softc { + struct device sc_dev; + struct scsi_link sc_link; + + int sc_flags; +#define HIMEM_RDONLY 0x0001 +#define HIMEM_DISKLABEL 0x0002 + int sc_size; /* blocks */ + + struct proc *sc_kthread; + u_int64_t *sc_pdir; + paddr_t sc_paddr; + struct mutex sc_inmtx; + struct mutex sc_freemtx; + TAILQ_HEAD(hibuf_head, hibuf) sc_free, sc_in; +}; + +int himem_scsi_cmd(struct scsi_xfer *); +int himem_scsi_ioctl(struct scsi_link *, u_long, caddr_t, int, struct proc *); +void himeminphys(struct buf *bp); + +struct scsi_adapter himem_switch = { + himem_scsi_cmd, himeminphys, 0, 0, himem_scsi_ioctl +}; + +struct scsi_device himem_dev = { + NULL, NULL, NULL, NULL +}; + +int himem_match(struct device *, void *, void *); +void himem_attach(struct device *, struct device *, void *); + +struct cfattach himem_ca = { + sizeof(struct himem_softc), himem_match, himem_attach +}; + +struct cfdriver himem_cd = { + NULL, "himem", DV_DULL +}; + +void himem(void *); +void himem_create(void *); + +int +himem_match(struct device *parent, void *match, void *aux) +{ + extern u_int64_t avail_end, avail_end2; + struct cfdata *cf = match; + + if (cf->cf_unit) + return 0; + + /* if no PAE or too little memory then screw it */ + if (!(cpu_feature & CPUID_PAE) || avail_end2 == 0 || + (avail_end2 - avail_end) < 4 * HIMEM_MAXCMDS * PAGE_SIZE) + return 0; + + return 1; +} + +void +himem_attach(struct device *parent, struct device *self, void *aux) +{ + extern u_int64_t avail_end2; + struct himem_softc *sc = (struct himem_softc *)self; + struct disklabel *lp; + struct hibuf *bp; + paddr_t pa; + vaddr_t va; + int i, pdsize; + + TAILQ_INIT(&sc->sc_in); + TAILQ_INIT(&sc->sc_free); + mtx_init(&sc->sc_inmtx, IPL_SCHED); + mtx_init(&sc->sc_freemtx, IPL_BIO); + + pdsize = 4 * PAGE_SIZE; + sc->sc_pdir = (u_int64_t *)uvm_km_alloc1(kernel_map, pdsize, + PAGE_SIZE, 1); + if (!sc->sc_pdir) { + printf(": cannot allocate page index\n"); + return; + } + +#define vatopa(va) pmap_extract(pmap_kernel(), (vaddr_t)va, &pa) + /* we do know like for sure there ain't no like user space */ + vatopa((vaddr_t)sc->sc_pdir + 0 * PAGE_SIZE); sc->sc_paddr = pa; + sc->sc_pdir[0] = pa | PG_V; + vatopa((vaddr_t)sc->sc_pdir + 1 * PAGE_SIZE); + sc->sc_pdir[1] = pa | PG_V; + vatopa((vaddr_t)sc->sc_pdir + 2 * PAGE_SIZE); + sc->sc_pdir[2] = pa | PG_V; + vatopa((vaddr_t)sc->sc_pdir + 3 * PAGE_SIZE); + sc->sc_pdir[3] = pa | PG_V; + + /* 8M of kernel code is ment to be enough for everbody */ + sc->sc_pdir[(KERNBASE >> 21) + 0] = 0x000000 | + PG_KW | PG_M | PG_U | PG_V | PG_PS; + sc->sc_pdir[(KERNBASE >> 21) + 1] = 0x200000 | + PG_KW | PG_M | PG_U | PG_V | PG_PS; + sc->sc_pdir[(KERNBASE >> 21) + 2] = 0x400000 | + PG_KW | PG_M | PG_U | PG_V | PG_PS; + sc->sc_pdir[(KERNBASE >> 21) + 3] = 0x600000 | + PG_KW | PG_M | PG_U | PG_V | PG_PS; + + bp = (struct hibuf *)uvm_km_alloc1(kernel_map, + HIMEM_MAXCMDS * PAGE_SIZE, PAGE_SIZE, 1); + if (!bp) { + uvm_km_free(kmem_map, (vaddr_t)sc->sc_pdir, pdsize); + printf(": no memory for buffers\n"); + return; + } + + /* each buf is a page table and hibuf in one! */ + for (i = HIMEM_MAXCMDS, va = (vaddr_t)bp; i--; va += PAGE_SIZE) { + bp = (struct hibuf *)va; + TAILQ_INSERT_TAIL(&sc->sc_free, bp, hb_list); + pmap_extract(pmap_kernel(), va, &bp->hb_pa); + } + + sc->sc_size = (avail_end2 - 0x100000000ULL) / DEV_BSIZE; + printf(": size %uMB\n", sc->sc_size / 2048); + + /* set a fake diskalbel */ + lp = (void *)uvm_km_zalloc(kernel_map, round_page(sizeof(*lp))); + if (lp) { + lp->d_secsize = DEV_BSIZE; + lp->d_ntracks = 255; + lp->d_nsectors = 63; + lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; + lp->d_ncylinders = sc->sc_size / lp->d_secpercyl; + lp->d_type = DTYPE_SCSI; + strncpy(lp->d_typename, "SCSI disk", sizeof(lp->d_typename)); + strncpy(lp->d_packname, "HIMEM drive", sizeof(lp->d_packname)); + DL_SETDSIZE(lp, sc->sc_size); + lp->d_rpm = 32768; + lp->d_interleave = 1; + lp->d_version = 1; + lp->d_flags = 0; + lp->d_bbsize = 8192; + lp->d_sbsize = 65536; + lp->d_magic = DISKMAGIC; + lp->d_magic2 = DISKMAGIC; + + lp->d_npartitions = MAXPARTITIONS; + lp->d_partitions[0].p_fstype = FS_BSDFFS; + DL_SETPSIZE(&lp->d_partitions[0], sc->sc_size); + DL_SETPOFFSET(&lp->d_partitions[0], 0); + lp->d_partitions[0].p_fragblock = + DISKLABELV1_FFS_FRAGBLOCK(4096, 4); + DL_SETPSIZE(&lp->d_partitions[RAW_PART], sc->sc_size); + DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); + + lp->d_checksum = dkcksum(lp); + + /* write out the label */ + vatopa(lp); + sc->sc_pdir[HIMEM_PDE] = PG_KW | PG_U | PG_M | PG_V | PG_PS | + (pa & ~(0x200000 - 1)); + sc->sc_pdir[HIMEM_PDE+1] = PG_KW | PG_U | PG_M | PG_V | PG_PS | + 0x100000000ULL; + + himem_zefix(sc->sc_pdir, + (char *)HIMEM_VA + (pa & (0x200000 - 1)), + (char *)HIMEM_HIGH + LABELSECTOR * DEV_BSIZE, DEV_BSIZE); + + sc->sc_pdir[HIMEM_PDE] = 0; + sc->sc_pdir[HIMEM_PDE+1] = 0; + uvm_km_free(kernel_map, (vaddr_t)lp, round_page(sizeof(*lp))); + } +#undef vatopa + + kthread_create_deferred(himem_create, sc); +} + +void +himem_create(void *v) +{ + struct himem_softc *sc = v; + + kthread_create(himem, sc, &sc->sc_kthread, "himem.sys"); +} + +int +himem_scsi_cmd(struct scsi_xfer *xs) +{ + struct scsi_link *link = xs->sc_link; + struct himem_softc *sc = link->adapter_softc; + struct scsi_read_cap_data rcd; + struct scsi_inquiry_data inq; + struct hibuf *bp; + u_int64_t *ptes; + paddr_t pa; + vaddr_t va, eva; + u_int bno, bcnt; + int s, res; + + s = splbio(); + if (link->target > 0 || !sc->sc_size || link->lun != 0) { + bzero(&xs->sense, sizeof(xs->sense)); + xs->sense.error_code = SSD_ERRCODE_VALID | 0x70; + xs->sense.flags = SKEY_ILLEGAL_REQUEST; + xs->sense.add_sense_code = 0x20; /* illcmd, 0x24 illfield */ + xs->error = XS_SENSE; + scsi_done(xs); + splx(s); + return (COMPLETE); + } + + xs->error = XS_NOERROR; + switch (xs->cmd->opcode) { + default: + xs->sense.error_code = SSD_ERRCODE_VALID | 0x70; + xs->sense.flags = SKEY_ILLEGAL_REQUEST; + xs->sense.add_sense_code = 0x20; /* illcmd, 0x24 illfield */ + xs->error = XS_SENSE; + scsi_done(xs); + splx(s); + return (COMPLETE); + + case TEST_UNIT_READY: + case START_STOP: + case PREVENT_ALLOW: + splx(s); + return COMPLETE; + + case INQUIRY: + bzero(&inq, sizeof(inq)); + inq.device = T_DIRECT; + inq.dev_qual2 = 0; + inq.version = 2; + inq.response_format = 2; + inq.additional_length = 32; + strlcpy(inq.vendor, "McIkye ", sizeof(inq.vendor)); + strlcpy(inq.product, "HIMEM drive ", sizeof(inq.product)); + strlcpy(inq.revision, "1.1", sizeof(inq.revision)); + bcopy(&inq, xs->data, MIN(sizeof(inq), xs->datalen)); + scsi_done(xs); + splx(s); + return COMPLETE; + + case READ_CAPACITY: + bzero(&rcd, sizeof(rcd)); + _lto4b(sc->sc_size - 1, rcd.addr); + _lto4b(DEV_BSIZE, rcd.length); + bcopy(&rcd, xs->data, MIN(sizeof(rcd), xs->datalen)); + scsi_done(xs); + splx(s); + return COMPLETE; + + case SYNCHRONIZE_CACHE: + mtx_enter(&sc->sc_inmtx); + if (!TAILQ_EMPTY(&sc->sc_in)) { + bp = TAILQ_LAST(&sc->sc_in, hibuf_head); + bp->hb_flags |= HIMEM_WAKE; + msleep(bp, &sc->sc_inmtx, PRIBIO, "himem.sync", 0); + } + mtx_leave(&sc->sc_inmtx); + scsi_done(xs); + splx(s); + return COMPLETE; + + case WRITE_COMMAND: + case WRITE_BIG: + if (sc->sc_flags & HIMEM_RDONLY) { + xs->sense.error_code = SSD_ERRCODE_VALID | 0x70; + xs->sense.flags = SKEY_WRITE_PROTECT; + xs->sense.add_sense_code = 0; + xs->error = XS_SENSE; + scsi_done(xs); + splx(s); + return COMPLETE; + } + case READ_COMMAND: + case READ_BIG: + if (xs->cmdlen == 6) { + struct scsi_rw *rw = (struct scsi_rw *)xs->cmd; + bno = _3btol(rw->addr) & (SRW_TOPADDR << 16 | 0xffff); + bcnt = rw->length ? rw->length : 0x100; + } else { + struct scsi_rw_big *rwb = (struct scsi_rw_big *)xs->cmd; + bno = _4btol(rwb->addr); + bcnt = _2btol(rwb->length); + } + + if (bno >= sc->sc_size || bno + bcnt > sc->sc_size) { + xs->error = XS_DRIVER_STUFFUP; + scsi_done(xs); + splx(s); + return COMPLETE; + } + + mtx_enter(&sc->sc_freemtx); + bp = TAILQ_FIRST(&sc->sc_free); + TAILQ_REMOVE(&sc->sc_free, bp, hb_list); + mtx_leave(&sc->sc_freemtx); + splx(s); + + bp->hb_xs = xs; + res = (vaddr_t)xs->data & PAGE_MASK; + if (xs->cmd->opcode == READ_COMMAND || + xs->cmd->opcode == READ_BIG) { + bp->hb_dst = (char *)HIMEM_LOW + res; + bp->hb_src = (char *)HIMEM_HIGH + + ((bno & PDE_MASK) * DEV_BSIZE); + } else { + bp->hb_src = (char *)HIMEM_LOW + res; + bp->hb_dst = (char *)HIMEM_HIGH + + ((bno & PDE_MASK) * DEV_BSIZE); + } + + bp->hb_len = xs->datalen; + bp->hb_bno = bno; + bp->hb_flags = 0; + + ptes = (u_int64_t *)bp + HIMEM_OFFSET; + for (va = (vaddr_t)xs->data - res, + eva = (vaddr_t)xs->data + xs->datalen + PAGE_SIZE - 1; + va < eva; va += PAGE_SIZE) { + pmap_extract(pmap_kernel(), va, &pa); + *ptes++ = pa | PG_KW | PG_U | PG_M | PG_V; + } + + if (xs->flags & SCSI_POLL) + bp->hb_flags |= HIMEM_WAKE; + + mtx_enter(&sc->sc_inmtx); + TAILQ_INSERT_TAIL(&sc->sc_in, bp, hb_list); + mtx_leave(&sc->sc_inmtx); + wakeup(&sc->sc_in); + if (xs->flags & SCSI_POLL) { + tsleep(bp, PRIBIO, "himem.poll", 0); + return COMPLETE; + } else + return SUCCESSFULLY_QUEUED; + } +} + +int +himem_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag, + struct proc *p) +{ + /* struct himem_softc *sc = link->adapter_softc; */ + + /* TODO implement set-rdonly */ + return ENOTTY; +} + +void +himeminphys(struct buf *bp) +{ + if (bp->b_bcount > HIMEM_MAXPHYS) + bp->b_bcount = HIMEM_MAXPHYS; + minphys(bp); +} + +void +himem(void *v) +{ + struct scsibus_attach_args saa; + struct himem_softc *sc = v; + struct scsi_xfer *xs; + struct hibuf *bp; + int s, wake; + + sc->sc_link.device = &himem_dev; + sc->sc_link.device_softc = sc; + sc->sc_link.adapter = &himem_switch; + sc->sc_link.adapter_softc = sc; + sc->sc_link.adapter_target = 1; + sc->sc_link.adapter_buswidth = 1; + sc->sc_link.openings = HIMEM_MAXCMDS; + bzero(&saa, sizeof(saa)); + saa.saa_sc_link = &sc->sc_link; + config_found(&sc->sc_dev, &saa, scsiprint); + + KERNEL_PROC_UNLOCK(curproc); + + for (;;) { + mtx_enter(&sc->sc_inmtx); + while (TAILQ_EMPTY(&sc->sc_in)) + msleep(&sc->sc_in, &sc->sc_inmtx, PRIBIO, "himem.sys", 0); + + bp = TAILQ_FIRST(&sc->sc_in); + TAILQ_REMOVE(&sc->sc_in, bp, hb_list); + mtx_leave(&sc->sc_inmtx); + + sc->sc_pdir[HIMEM_PDE] = bp->hb_pa | PG_KW | PG_U | PG_M | PG_V; + sc->sc_pdir[HIMEM_PDE+1] = PG_KW | PG_U | PG_M | PG_V | PG_PS | + (0x100000000ULL + (bp->hb_bno & ~PDE_MASK) * DEV_BSIZE); + sc->sc_pdir[HIMEM_PDE+2] = sc->sc_pdir[HIMEM_PDE+1] + 0x200000; + + himem_zefix(sc->sc_pdir, bp->hb_src, bp->hb_dst, bp->hb_len); + + sc->sc_pdir[HIMEM_PDE] = 0; + sc->sc_pdir[HIMEM_PDE+1] = 0; + + xs = bp->hb_xs; + xs->resid -= bp->hb_len; + xs->flags |= ITSDONE; + wake = bp->hb_flags & HIMEM_WAKE; + mtx_enter(&sc->sc_freemtx); + TAILQ_INSERT_HEAD(&sc->sc_free, bp, hb_list); + mtx_leave(&sc->sc_freemtx); + + KERNEL_PROC_LOCK(curproc); + s = splbio(); + scsi_done(xs); + splx(s); + if (wake) + wakeup(bp); + KERNEL_PROC_UNLOCK(curproc); + } +} Index: arch/i386/i386/locore.s =================================================================== RCS file: /cvs/src/sys/arch/i386/i386/locore.s,v retrieving revision 1.121 diff -u -r1.121 locore.s --- arch/i386/i386/locore.s 28 Nov 2007 17:05:09 -0000 1.121 +++ arch/i386/i386/locore.s 9 May 2008 09:27:25 -0000 @@ -44,6 +44,7 @@ #include "pctr.h" #include "ksyms.h" #include "acpi.h" +#include "himem.h" #include <sys/errno.h> #include <sys/syscall.h> @@ -1846,6 +1847,70 @@ popl %ebx popl %edi + ret +#endif + +#if NHIMEM > 0 +/* + * void + * himem_zefix(vaddr_t pdir, void *src, void *dst, int len) + * + * switch to PAE mode and copy some pages. come back before xmas. + * hafta be some serious splipi() or bad shitz would happenz. + * + */ +ENTRY(himem_zefix) +#ifdef DDB + pushl %ebp + movl %esp,%ebp +#endif + pushf + cli + pushl %esi + pushl %edi + movl 8(%ebp), %edi + movl %cr3, %esi + addl $KERNBASE, %esi + addl $4*8, %edi /* save current PD content */ + movl $8, %ecx + cld + rep + movsl + movl 8(%ebp), %esi /* install our PDI */ + movl %cr3, %edi + addl $KERNBASE, %edi + movl $8, %ecx + cld + rep + movsl + movl 12(%ebp), %esi + movl 16(%ebp), %edi + movl 20(%ebp), %ecx + shrl $2, %ecx + movl %cr4, %eax + orl $(CR4_PAE|CR4_PSE), %eax + movl %eax, %cr4 /* also flushes the hell out of TLBs */ + cld + rep + movsl + movl %cr4, %eax + andl $~(CR4_PAE|CR4_PSE), %eax + movl %eax, %cr4 /* again flushes the hell out of TLBs */ + xorl %eax, %eax + movl 8(%ebp), %esi /* restore original PD contents */ + movl %cr3, %edi + addl $KERNBASE, %edi + addl $4*8, %esi + movl $8, %ecx + cld + rep + movsl + popl %edi + popl %esi + popf +#ifdef DDB + leave +#endif ret #endif Index: arch/i386/i386/machdep.c =================================================================== RCS file: /cvs/src/sys/arch/i386/i386/machdep.c,v retrieving revision 1.418 diff -u -r1.418 machdep.c --- arch/i386/i386/machdep.c 18 Feb 2008 16:31:55 -0000 1.418 +++ arch/i386/i386/machdep.c 7 May 2008 12:55:43 -0000 @@ -234,7 +234,7 @@ int i386_has_xcrypt; bootarg_t *bootargp; -paddr_t avail_end; +u_int64_t avail_end, avail_end2; struct vm_map *exec_map = NULL; struct vm_map *phys_map = NULL; @@ -2970,7 +2970,7 @@ * account all the memory passed in the map from /boot * calculate avail_end and count the physmem. */ - avail_end = 0; + avail_end = avail_end2 = 0; physmem = 0; #ifdef DEBUG printf("memmap:"); @@ -2986,6 +2986,8 @@ #ifdef DEBUG printf("-H"); #endif + avail_end2 = MAX(avail_end2, + trunc_page(im->addr + im->size)); continue; } Index: arch/i386/i386/pmap.c =================================================================== RCS file: /cvs/src/sys/arch/i386/i386/pmap.c,v retrieving revision 1.122 diff -u -r1.122 pmap.c --- arch/i386/i386/pmap.c 13 Jan 2008 20:47:00 -0000 1.122 +++ arch/i386/i386/pmap.c 7 May 2008 11:53:12 -0000 @@ -189,10 +189,6 @@ * in the alternate PTE space (since that is determined by the * entry in the PDP). * - * - pvalloc_lock - * this lock protects the data structures which are used to manage - * the free list of pv_entry structures. - * * - pmaps_lock * this lock protects the list of active pmaps (headed by "pmaps"). * we lock it when adding or removing pmaps from this list. @@ -203,7 +199,6 @@ * locking data structures */ -struct simplelock pvalloc_lock; struct simplelock pmaps_lock; #define PMAP_MAP_TO_HEAD_LOCK() /* null */ @@ -265,21 +260,6 @@ static vaddr_t virtual_end; /* VA of last free KVA */ /* - * pv_page management structures: locked by pvalloc_lock - */ - -TAILQ_HEAD(pv_pagelist, pv_page); -static struct pv_pagelist pv_freepages; /* list of pv_pages with free entries */ -static struct pv_pagelist pv_unusedpgs; /* list of unused pv_pages */ -static int pv_nfpvents; /* # of free pv entries */ -static struct pv_page *pv_initpage; /* bootstrap page from kernel_map */ -static vaddr_t pv_cachedva; /* cached VA for later use */ - -#define PVE_LOWAT (PVE_PER_PVPAGE / 2) /* free pv_entry low water mark */ -#define PVE_HIWAT (PVE_LOWAT + (PVE_PER_PVPAGE * 2)) - /* high water mark */ - -/* * linked list of all non-kernel pmaps */ @@ -290,6 +270,7 @@ */ struct pool pmap_pmap_pool; +struct pool pmap_pvpl; /* * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a @@ -321,19 +302,10 @@ * local prototypes */ -struct pv_entry *pmap_add_pvpage(struct pv_page *, boolean_t); struct vm_page *pmap_alloc_ptp(struct pmap *, int, boolean_t, pt_entry_t); -struct pv_entry *pmap_alloc_pv(struct pmap *, int); /* see codes below */ -#define ALLOCPV_NEED 0 /* need PV now */ -#define ALLOCPV_TRY 1 /* just try to allocate */ -#define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */ -struct pv_entry *pmap_alloc_pvpage(struct pmap *, int); void pmap_enter_pv(struct vm_page *, struct pv_entry *, struct pmap *, vaddr_t, struct vm_page *); -void pmap_free_pv(struct pmap *, struct pv_entry *); void pmap_free_pvs(struct pmap *, struct pv_entry *); -void pmap_free_pv_doit(struct pv_entry *); -void pmap_free_pvpage(void); struct vm_page *pmap_get_ptp(struct pmap *, int, boolean_t); boolean_t pmap_is_curpmap(struct pmap *); boolean_t pmap_is_active(struct pmap *, int); @@ -349,10 +321,6 @@ #define PMAP_REMOVE_ALL 0 #define PMAP_REMOVE_SKIPWIRED 1 -vaddr_t pmap_tmpmap_pa(paddr_t); -pt_entry_t *pmap_tmpmap_pvepte(struct pv_entry *); -void pmap_tmpunmap_pa(void); -void pmap_tmpunmap_pvepte(struct pv_entry *); void pmap_apte_flush(struct pmap *); void pmap_unmap_ptes(struct pmap *); void pmap_exec_account(struct pmap *, vaddr_t, pt_entry_t, @@ -418,87 +386,6 @@ } } -/* - * pmap_tmpmap_pa: map a page in for tmp usage - */ - -vaddr_t -pmap_tmpmap_pa(paddr_t pa) -{ -#ifdef MULTIPROCESSOR - int id = cpu_number(); -#endif - pt_entry_t *ptpte = PTESLEW(ptp_pte, id); - caddr_t ptpva = VASLEW(ptpp, id); -#if defined(DIAGNOSTIC) - if (*ptpte) - panic("pmap_tmpmap_pa: ptp_pte in use?"); -#endif - *ptpte = PG_V | PG_RW | pa; /* always a new mapping */ - return((vaddr_t)ptpva); -} - -/* - * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa) - */ - -void -pmap_tmpunmap_pa() -{ -#ifdef MULTIPROCESSOR - int id = cpu_number(); -#endif - pt_entry_t *ptpte = PTESLEW(ptp_pte, id); - caddr_t ptpva = VASLEW(ptpp, id); -#if defined(DIAGNOSTIC) - if (!pmap_valid_entry(*ptpte)) - panic("pmap_tmpunmap_pa: our pte invalid?"); -#endif - *ptpte = 0; /* zap! */ - pmap_update_pg((vaddr_t)ptpva); -#ifdef MULTIPROCESSOR - /* - * No need for tlb shootdown here, since ptp_pte is per-CPU. - */ -#endif -} - -/* - * pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry - * - * => do NOT use this on kernel mappings [why? because pv_ptp may be NULL] - */ - -pt_entry_t * -pmap_tmpmap_pvepte(struct pv_entry *pve) -{ -#ifdef DIAGNOSTIC - if (pve->pv_pmap == pmap_kernel()) - panic("pmap_tmpmap_pvepte: attempt to map kernel"); -#endif - - /* is it current pmap? use direct mapping... */ - if (pmap_is_curpmap(pve->pv_pmap)) - return(vtopte(pve->pv_va)); - - return(((pt_entry_t *)pmap_tmpmap_pa(VM_PAGE_TO_PHYS(pve->pv_ptp))) - + ptei((unsigned)pve->pv_va)); -} - -/* - * pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte - */ - -void -pmap_tmpunmap_pvepte(struct pv_entry *pve) -{ - /* was it current pmap? if so, return */ - if (pmap_is_curpmap(pve->pv_pmap)) - return; - - pmap_tmpunmap_pa(); -} - void pmap_apte_flush(struct pmap *pmap) { @@ -900,11 +787,8 @@ * init the static-global locks and global lists. */ - simple_lock_init(&pvalloc_lock); simple_lock_init(&pmaps_lock); LIST_INIT(&pmaps); - TAILQ_INIT(&pv_freepages); - TAILQ_INIT(&pv_unusedpgs); /* * initialize the pmap pool. @@ -913,6 +797,9 @@ pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl", &pool_allocator_nointr); + pool_init(&pmap_pvpl, sizeof(struct pv_entry), 0, 0, 0, "pvpl", + &pool_allocator_nointr); + /* * ensure the TLB is sync'd with reality by flushing it... */ @@ -930,342 +817,21 @@ pmap_init(void) { /* - * now we need to free enough pv_entry structures to allow us to get - * the kmem_map allocated and inited (done after this function is - * finished). to do this we allocate one bootstrap page out of - * kernel_map and use it to provide an initial pool of pv_entry - * structures. we never free this page. - */ - - pv_initpage = (struct pv_page *) uvm_km_alloc(kernel_map, PAGE_SIZE); - if (pv_initpage == NULL) - panic("pmap_init: pv_initpage"); - pv_cachedva = 0; /* a VA we have allocated but not used yet */ - pv_nfpvents = 0; - (void) pmap_add_pvpage(pv_initpage, FALSE); - - /* * done: pmap module is up (and ready for business) */ pmap_initialized = TRUE; } -/* - * p v _ e n t r y f u n c t i o n s - */ - -/* - * pv_entry allocation functions: - * the main pv_entry allocation functions are: - * pmap_alloc_pv: allocate a pv_entry structure - * pmap_free_pv: free one pv_entry - * pmap_free_pvs: free a list of pv_entrys - * - * the rest are helper functions - */ - -/* - * pmap_alloc_pv: inline function to allocate a pv_entry structure - * => we lock pvalloc_lock - * => if we fail, we call out to pmap_alloc_pvpage - * => 3 modes: - * ALLOCPV_NEED = we really need a pv_entry - * ALLOCPV_TRY = we want a pv_entry - * ALLOCPV_NONEED = we are trying to grow our free list, don't really need - * one now - * - * "try" is for optional functions like pmap_copy(). - */ - -struct pv_entry * -pmap_alloc_pv(struct pmap *pmap, int mode) -{ - struct pv_page *pvpage; - struct pv_entry *pv; - - simple_lock(&pvalloc_lock); - - if (!TAILQ_EMPTY(&pv_freepages)) { - pvpage = TAILQ_FIRST(&pv_freepages); - pvpage->pvinfo.pvpi_nfree--; - if (pvpage->pvinfo.pvpi_nfree == 0) { - /* nothing left in this one? */ - TAILQ_REMOVE(&pv_freepages, pvpage, pvinfo.pvpi_list); - } - pv = pvpage->pvinfo.pvpi_pvfree; -#ifdef DIAGNOSTIC - if (pv == NULL) - panic("pmap_alloc_pv: pvpi_nfree off"); -#endif - pvpage->pvinfo.pvpi_pvfree = pv->pv_next; - pv_nfpvents--; /* took one from pool */ - } else { - pv = NULL; /* need more of them */ - } - - /* - * if below low water mark or we didn't get a pv_entry we try and - * create more pv_entrys ... - */ - - if (pv_nfpvents < PVE_LOWAT || pv == NULL) { - if (pv == NULL) - pv = pmap_alloc_pvpage(pmap, (mode == ALLOCPV_TRY) ? - mode : ALLOCPV_NEED); - else - (void) pmap_alloc_pvpage(pmap, ALLOCPV_NONEED); - } - - simple_unlock(&pvalloc_lock); - return(pv); -} - -/* - * pmap_alloc_pvpage: maybe allocate a new pvpage - * - * if need_entry is false: try and allocate a new pv_page - * if need_entry is true: try and allocate a new pv_page and return a - * new pv_entry from it. - * - * => we assume that the caller holds pvalloc_lock - */ - -struct pv_entry * -pmap_alloc_pvpage(struct pmap *pmap, int mode) -{ - struct vm_page *pg; - struct pv_page *pvpage; - struct pv_entry *pv; - int s; - - /* - * if we need_entry and we've got unused pv_pages, allocate from there - */ - - if (mode != ALLOCPV_NONEED && !TAILQ_EMPTY(&pv_unusedpgs)) { - - /* move it to pv_freepages list */ - pvpage = TAILQ_FIRST(&pv_unusedpgs); - TAILQ_REMOVE(&pv_unusedpgs, pvpage, pvinfo.pvpi_list); - TAILQ_INSERT_HEAD(&pv_freepages, pvpage, pvinfo.pvpi_list); - - /* allocate a pv_entry */ - pvpage->pvinfo.pvpi_nfree--; /* can't go to zero */ - pv = pvpage->pvinfo.pvpi_pvfree; -#ifdef DIAGNOSTIC - if (pv == NULL) - panic("pmap_alloc_pvpage: pvpi_nfree off"); -#endif - pvpage->pvinfo.pvpi_pvfree = pv->pv_next; - - pv_nfpvents--; /* took one from pool */ - return(pv); - } - - /* - * see if we've got a cached unmapped VA that we can map a page in. - * if not, try to allocate one. - */ - - s = splvm(); /* must protect kmem_map with splvm! */ - if (pv_cachedva == 0) { - pv_cachedva = uvm_km_kmemalloc(kmem_map, NULL, - NBPG, UVM_KMF_TRYLOCK|UVM_KMF_VALLOC); - } - splx(s); - if (pv_cachedva == 0) - return (NULL); - - pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE); - if (pg == NULL) - return (NULL); - - atomic_clearbits_int(&pg->pg_flags, PG_BUSY); - - /* - * add a mapping for our new pv_page and free its entries (save one!) - * - * NOTE: If we are allocating a PV page for the kernel pmap, the - * pmap is already locked! (...but entering the mapping is safe...) - */ - - pmap_kenter_pa(pv_cachedva, VM_PAGE_TO_PHYS(pg), - VM_PROT_READ|VM_PROT_WRITE); - pvpage = (struct pv_page *) pv_cachedva; - pv_cachedva = 0; - return (pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED)); -} - -/* - * pmap_add_pvpage: add a pv_page's pv_entrys to the free list - * - * => caller must hold pvalloc_lock - * => if need_entry is true, we allocate and return one pv_entry - */ - -struct pv_entry * -pmap_add_pvpage(struct pv_page *pvp, boolean_t need_entry) -{ - int tofree, lcv; - - /* do we need to return one? */ - tofree = (need_entry) ? PVE_PER_PVPAGE - 1 : PVE_PER_PVPAGE; - - pvp->pvinfo.pvpi_pvfree = NULL; - pvp->pvinfo.pvpi_nfree = tofree; - for (lcv = 0 ; lcv < tofree ; lcv++) { - pvp->pvents[lcv].pv_next = pvp->pvinfo.pvpi_pvfree; - pvp->pvinfo.pvpi_pvfree = &pvp->pvents[lcv]; - } - if (need_entry) - TAILQ_INSERT_TAIL(&pv_freepages, pvp, pvinfo.pvpi_list); - else - TAILQ_INSERT_TAIL(&pv_unusedpgs, pvp, pvinfo.pvpi_list); - pv_nfpvents += tofree; - return((need_entry) ? &pvp->pvents[lcv] : NULL); -} - -/* - * pmap_free_pv_doit: actually free a pv_entry - * - * => do not call this directly! instead use either - * 1. pmap_free_pv ==> free a single pv_entry - * 2. pmap_free_pvs => free a list of pv_entrys - * => we must be holding pvalloc_lock - */ - -void -pmap_free_pv_doit(struct pv_entry *pv) -{ - struct pv_page *pvp; - - pvp = (struct pv_page*)trunc_page((vaddr_t)pv); - pv_nfpvents++; - pvp->pvinfo.pvpi_nfree++; - - /* nfree == 1 => fully allocated page just became partly allocated */ - if (pvp->pvinfo.pvpi_nfree == 1) { - TAILQ_INSERT_HEAD(&pv_freepages, pvp, pvinfo.pvpi_list); - } - - /* free it */ - pv->pv_next = pvp->pvinfo.pvpi_pvfree; - pvp->pvinfo.pvpi_pvfree = pv; - - /* - * are all pv_page's pv_entry's free? move it to unused queue. - */ - - if (pvp->pvinfo.pvpi_nfree == PVE_PER_PVPAGE) { - TAILQ_REMOVE(&pv_freepages, pvp, pvinfo.pvpi_list); - TAILQ_INSERT_HEAD(&pv_unusedpgs, pvp, pvinfo.pvpi_list); - } -} - -/* - * pmap_free_pv: free a single pv_entry - * - * => we gain the pvalloc_lock - */ - -void -pmap_free_pv(struct pmap *pmap, struct pv_entry *pv) -{ - simple_lock(&pvalloc_lock); - pmap_free_pv_doit(pv); - - /* - * Can't free the PV page if the PV entries were associated with - * the kernel pmap; the pmap is already locked. - */ - if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL && - pmap != pmap_kernel()) - pmap_free_pvpage(); - - simple_unlock(&pvalloc_lock); -} - -/* - * pmap_free_pvs: free a list of pv_entrys - * - * => we gain the pvalloc_lock - */ - void pmap_free_pvs(struct pmap *pmap, struct pv_entry *pvs) { struct pv_entry *nextpv; - simple_lock(&pvalloc_lock); - for ( /* null */ ; pvs != NULL ; pvs = nextpv) { nextpv = pvs->pv_next; - pmap_free_pv_doit(pvs); - } - - /* - * Can't free the PV page if the PV entries were associated with - * the kernel pmap; the pmap is already locked. - */ - if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL && - pmap != pmap_kernel()) - pmap_free_pvpage(); - - simple_unlock(&pvalloc_lock); -} - - -/* - * pmap_free_pvpage: try and free an unused pv_page structure - * - * => assume caller is holding the pvalloc_lock and that - * there is a page on the pv_unusedpgs list - * => if we can't get a lock on the kmem_map we try again later - */ - -void -pmap_free_pvpage(void) -{ - int s; - struct vm_map *map; - struct vm_map_entry *dead_entries; - struct pv_page *pvp; - - s = splvm(); /* protect kmem_map */ - pvp = TAILQ_FIRST(&pv_unusedpgs); - - /* - * note: watch out for pv_initpage which is allocated out of - * kernel_map rather than kmem_map. - */ - - if (pvp == pv_initpage) - map = kernel_map; - else - map = kmem_map; - if (vm_map_lock_try(map)) { - - /* remove pvp from pv_unusedpgs */ - TAILQ_REMOVE(&pv_unusedpgs, pvp, pvinfo.pvpi_list); - - /* unmap the page */ - dead_entries = NULL; - uvm_unmap_remove(map, (vaddr_t)pvp, ((vaddr_t)pvp) + PAGE_SIZE, - &dead_entries, NULL); - vm_map_unlock(map); - - if (dead_entries != NULL) - uvm_unmap_detach(dead_entries, 0); - - pv_nfpvents -= PVE_PER_PVPAGE; /* update free count */ + pool_put(&pmap_pvpl, pvs); } - - if (pvp == pv_initpage) - /* no more initpage, we've freed it */ - pv_initpage = NULL; - - splx(s); } /* @@ -1991,7 +1557,7 @@ pmap_sync_flags_pte(pg, opte); pve = pmap_remove_pv(pg, pmap, va); if (pve) - pmap_free_pv(pmap, pve); + pool_put(&pmap_pvpl, pve); return(TRUE); } @@ -2704,7 +2270,7 @@ if (pg != NULL) { if (pve == NULL) { - pve = pmap_alloc_pv(pmap, ALLOCPV_NEED); + pve = pool_get(&pmap_pvpl, PR_WAITOK); if (pve == NULL) { if (flags & PMAP_CANFAIL) { /* @@ -2722,7 +2288,7 @@ /* new mapping is not PG_PVLIST. free pve if we've got one */ if (pve) - pmap_free_pv(pmap, pve); + pool_put(&pmap_pvpl, pve); } enter_now: Index: arch/i386/isa/isa_machdep.c =================================================================== RCS file: /cvs/src/sys/arch/i386/isa/isa_machdep.c,v retrieving revision 1.61 diff -u -r1.61 isa_machdep.c --- arch/i386/isa/isa_machdep.c 7 Sep 2007 15:00:19 -0000 1.61 +++ arch/i386/isa/isa_machdep.c 7 May 2008 12:55:43 -0000 @@ -105,7 +105,7 @@ #include "isadma.h" -extern paddr_t avail_end; +extern u_int64_t avail_end; #define IDTVEC(name) __CONCAT(X,name) /* default interrupt vector table entries */ Index: arch/i386/pci/pci_addr_fixup.c =================================================================== RCS file: /cvs/src/sys/arch/i386/pci/pci_addr_fixup.c,v retrieving revision 1.21 diff -u -r1.21 pci_addr_fixup.c --- arch/i386/pci/pci_addr_fixup.c 20 Feb 2007 21:15:01 -0000 1.21 +++ arch/i386/pci/pci_addr_fixup.c 7 May 2008 12:55:43 -0000 @@ -80,7 +80,7 @@ void pci_addr_fixup(struct pcibios_softc *sc, pci_chipset_tag_t pc, int maxbus) { - extern paddr_t avail_end; + extern u_int64_t avail_end; const char *verbose_header = "[%s]-----------------------\n" " device vendor product\n" @@ -136,8 +136,9 @@ start = PCIADDR_ISAMEM_RESERVE; sc->mem_alloc_start = (start + 0x100000 + 1) & ~(0x100000 - 1); sc->port_alloc_start = PCIADDR_ISAPORT_RESERVE; - PCIBIOS_PRINTV((" Physical memory end: 0x%08x\n PCI memory mapped I/O " - "space start: 0x%08x\n", avail_end, sc->mem_alloc_start)); + PCIBIOS_PRINTV((" Physical memory end: 0x%08llx\n" + " PCI memory mapped I/O space start: 0x%08x\n", + avail_end, sc->mem_alloc_start)); /* * 4. do fixup Index: kern/kern_exit.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exit.c,v retrieving revision 1.72 diff -u -r1.72 kern_exit.c --- kern/kern_exit.c 10 Oct 2007 15:53:53 -0000 1.72 +++ kern/kern_exit.c 7 May 2008 11:55:28 -0000 @@ -233,6 +233,11 @@ systrace_exit(p); #endif /* + * unmap all user space now as we will not need it anymore + */ + uvmspace_unmap(p->p_vmspace); + + /* * NOTE: WE ARE NO LONGER ALLOWED TO SLEEP! */ p->p_stat = SDEAD; Index: kern/kern_malloc.c =================================================================== RCS file: /cvs/src/sys/kern/kern_malloc.c,v retrieving revision 1.74 diff -u -r1.74 kern_malloc.c --- kern/kern_malloc.c 21 Feb 2008 10:40:48 -0000 1.74 +++ kern/kern_malloc.c 7 May 2008 11:51:50 -0000 @@ -1,7 +1,7 @@ /* $OpenBSD: kern_malloc.c,v 1.74 2008/02/21 10:40:48 kettenis Exp $ */ -/* $NetBSD: kern_malloc.c,v 1.15.4.2 1996/06/13 17:10:56 cgd Exp $ */ /* + * Copyright (c) 2008 Michael Shalayeff * Copyright (c) 1987, 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -39,6 +39,7 @@ #include <sys/systm.h> #include <sys/sysctl.h> #include <sys/time.h> +#include <sys/pool.h> #include <sys/rwlock.h> #include <uvm/uvm_extern.h> @@ -74,6 +75,8 @@ #endif u_int nkmempages_max = 0; +struct pool mallocpl[MINBUCKET + 16]; +char mallocplnames[MINBUCKET + 16][8]; /* wchan for pool */ struct kmembuckets bucket[MINBUCKET + 16]; struct kmemstats kmemstats[M_LAST]; struct kmemusage *kmemusage; @@ -133,6 +136,77 @@ struct timeval malloc_lasterr; #endif +void *malloc_page_alloc(struct pool *, int); +void malloc_page_free(struct pool *, void *); +struct pool_allocator pool_allocator_malloc = { + malloc_page_alloc, malloc_page_free, 0, +}; + +void * +malloc_page_alloc(struct pool *pp, int flags) +{ +#if 0 + struct kmembuckets *kbp; + struct kmemusage *kup; + int indx; + + void *v = (void *)uvm_km_kmemalloc(kmem_map, uvmexp.kmem_object, + PAGE_SIZE, ((flags & M_NOWAIT) ? UVM_KMF_NOWAIT : 0) | + ((flags & M_CANFAIL) ? UVM_KMF_CANFAIL : 0)); + + kup = btokup(v); + indx = BUCKETINDX(pp->pr_size); + kup->ku_indx = indx; +#ifdef KMEMSTATS + kbp = &bucket[indx]; + kbp->kb_totalfree += kbp->kb_elmpercl; + kbp->kb_total += kbp->kb_elmpercl; +#endif +#else + void *v = uvm_km_getpage(flags & M_NOWAIT? 0 : 1); + struct vm_page *pg; + paddr_t pa; + + if (!pmap_extract(pmap_kernel(), (vaddr_t)v, &pa)) + panic("malloc_page_alloc: pmap_extract failed"); + + pg = PHYS_TO_VM_PAGE(pa); + if (pg == NULL) + panic("malloc_page_alloc: no page"); + pg->wire_count = BUCKETINDX(pp->pr_size); +#endif + return v; +} + +void +malloc_page_free(struct pool *pp, void *v) +{ +#if 0 + struct kmembuckets *kbp; + struct kmemusage *kup = btokup(v); + + kbp = &bucket[kup->ku_indx]; + uvm_km_free(kmem_map, (vaddr_t)v, PAGE_SIZE); + kup->ku_indx = 0; +#ifdef KMEMSTATS + kbp->kb_totalfree -= kbp->kb_elmpercl; + kbp->kb_total -= kbp->kb_elmpercl; +#endif +#else + struct vm_page *pg; + paddr_t pa; + + if (!pmap_extract(pmap_kernel(), (vaddr_t)v, &pa)) + panic("malloc_page_free: pmap_extract failed"); + + pg = PHYS_TO_VM_PAGE(pa); + if (pg == NULL) + panic("malloc_page_free: no page"); + pg->wire_count = 1; + uvm_km_putpage(v); +#endif +} + /* * Allocate a block of memory */ @@ -141,15 +215,9 @@ { struct kmembuckets *kbp; struct kmemusage *kup; - struct freelist *freep; long indx, npg, allocsize; int s; - caddr_t va, cp, savedlist; -#ifdef DIAGNOSTIC - int32_t *end, *lp; - int copysize; - char *savedtype; -#endif + caddr_t va; #ifdef KMEMSTATS struct kmemstats *ksp = &kmemstats[type]; @@ -161,7 +229,7 @@ if (debug_malloc(size, type, flags, (void **)&va)) { if ((flags & M_ZERO) && va != NULL) memset(va, 0, size); - return (va); + return ((void *) va); } #endif @@ -180,29 +248,22 @@ indx = BUCKETINDX(size); kbp = &bucket[indx]; s = splvm(); + if (size > MAXALLOCSAVE) { #ifdef KMEMSTATS - while (ksp->ks_memuse >= ksp->ks_limit) { - if (flags & M_NOWAIT) { - splx(s); - return (NULL); + while (ksp->ks_memuse >= ksp->ks_limit) { + if (flags & M_NOWAIT) { + splx(s); + return ((void *) NULL); + } + if (ksp->ks_limblocks < 65535) + ksp->ks_limblocks++; + tsleep(ksp, PSWP+2, memname[type], 0); } - if (ksp->ks_limblocks < 65535) - ksp->ks_limblocks++; - tsleep(ksp, PSWP+2, memname[type], 0); - } - ksp->ks_size |= 1 << indx; -#endif -#ifdef DIAGNOSTIC - copysize = 1 << indx < MAX_COPY ? 1 << indx : MAX_COPY; #endif - if (kbp->kb_next == NULL) { - if (size > MAXALLOCSAVE) - allocsize = round_page(size); - else - allocsize = 1 << indx; - npg = atop(round_page(allocsize)); + allocsize = round_page(size); + npg = atop(allocsize); va = (caddr_t) uvm_km_kmemalloc(kmem_map, NULL, - (vsize_t)ptoa(npg), + (vsize_t)ptoa(npg), ((flags & M_NOWAIT) ? UVM_KMF_NOWAIT : 0) | ((flags & M_CANFAIL) ? UVM_KMF_CANFAIL : 0)); if (va == NULL) { @@ -220,121 +281,41 @@ return (NULL); } #ifdef KMEMSTATS - kbp->kb_total += kbp->kb_elmpercl; + kbp->kb_total++; #endif kup = btokup(va); kup->ku_indx = indx; - if (allocsize > MAXALLOCSAVE) { - kup->ku_pagecnt = npg; -#ifdef KMEMSTATS - ksp->ks_memuse += allocsize; -#endif - goto out; - } + kup->ku_pagecnt = npg; #ifdef KMEMSTATS - kup->ku_freecnt = kbp->kb_elmpercl; - kbp->kb_totalfree += kbp->kb_elmpercl; + kbp->kb_calls++; + ksp->ks_memuse += allocsize; + ksp->ks_size |= 1 << indx; + ksp->ks_inuse++; + ksp->ks_calls++; + if (ksp->ks_memuse > ksp->ks_maxused) + ksp->ks_maxused = ksp->ks_memuse; #endif - /* - * Just in case we blocked while allocating memory, - * and someone else also allocated memory for this - * bucket, don't assume the list is still empty. - */ - savedlist = kbp->kb_next; - kbp->kb_next = cp = va + (npg * PAGE_SIZE) - allocsize; - for (;;) { - freep = (struct freelist *)cp; -#ifdef DIAGNOSTIC - /* - * Copy in known text to detect modification - * after freeing. - */ - end = (int32_t *)&cp[copysize]; - for (lp = (int32_t *)cp; lp < end; lp++) - *lp = WEIRD_ADDR; - freep->type = M_FREE; -#endif /* DIAGNOSTIC */ - if (cp <= va) - break; - cp -= allocsize; - freep->next = cp; - } - freep->next = savedlist; - if (savedlist == NULL) - kbp->kb_last = (caddr_t)freep; - } - va = kbp->kb_next; - kbp->kb_next = ((struct freelist *)va)->next; -#ifdef DIAGNOSTIC - freep = (struct freelist *)va; - savedtype = (unsigned)freep->type < M_LAST ? - memname[freep->type] : "???"; - if (kbp->kb_next) { - int rv; - vaddr_t addr = (vaddr_t)kbp->kb_next; - - vm_map_lock(kmem_map); - rv = uvm_map_checkprot(kmem_map, addr, - addr + sizeof(struct freelist), VM_PROT_WRITE); - vm_map_unlock(kmem_map); - - if (!rv) { - printf("%s %d of object %p size 0x%lx %s %s (invalid addr %p)\n", - "Data modified on freelist: word", - (int32_t *)&kbp->kb_next - (int32_t *)kbp, va, size, - "previous type", savedtype, kbp->kb_next); - kbp->kb_next = NULL; - } + splx(s); + if ((flags & M_ZERO) && va != NULL) + memset(va, 0, size); + return ((void *) va); } - /* Fill the fields that we've used with WEIRD_ADDR */ -#if BYTE_ORDER == BIG_ENDIAN - freep->type = WEIRD_ADDR >> 16; -#endif -#if BYTE_ORDER == LITTLE_ENDIAN - freep->type = (short)WEIRD_ADDR; -#endif - end = (int32_t *)&freep->next + - (sizeof(freep->next) / sizeof(int32_t)); - for (lp = (int32_t *)&freep->next; lp < end; lp++) - *lp = WEIRD_ADDR; - - /* and check that the data hasn't been modified. */ - end = (int32_t *)&va[copysize]; - for (lp = (int32_t *)va; lp < end; lp++) { - if (*lp == WEIRD_ADDR) - continue; - printf("%s %d of object %p size 0x%lx %s %s (0x%x != 0x%x)\n", - "Data modified on freelist: word", lp - (int32_t *)va, - va, size, "previous type", savedtype, *lp, WEIRD_ADDR); - break; + va = pool_get(&mallocpl[indx], PR_LIMITFAIL | + (flags & M_NOWAIT? 0 : PR_WAITOK)); + if (!va && (flags & (M_NOWAIT|M_CANFAIL)) == 0) + panic("malloc: out of space in kmem pool"); +#ifdef KMEMSTATS + if (va) { + ksp->ks_size |= 1 << indx; + ksp->ks_inuse++; + ksp->ks_calls++; } - - freep->spare0 = 0; -#endif /* DIAGNOSTIC */ -#ifdef KMEMSTATS - kup = btokup(va); - if (kup->ku_indx != indx) - panic("malloc: wrong bucket"); - if (kup->ku_freecnt == 0) - panic("malloc: lost data"); - kup->ku_freecnt--; - kbp->kb_totalfree--; - ksp->ks_memuse += 1 << indx; -out: - kbp->kb_calls++; - ksp->ks_inuse++; - ksp->ks_calls++; - if (ksp->ks_memuse > ksp->ks_maxused) - ksp->ks_maxused = ksp->ks_memuse; -#else -out: #endif splx(s); - if ((flags & M_ZERO) && va != NULL) memset(va, 0, size); - return (va); + return ((void *) va); } /* @@ -345,13 +326,12 @@ { struct kmembuckets *kbp; struct kmemusage *kup; - struct freelist *freep; + struct vm_page *pg; + paddr_t pa; long size; int s; #ifdef DIAGNOSTIC - caddr_t cp; - int32_t *end, *lp; - long alloc, copysize; + long alloc; #endif #ifdef KMEMSTATS struct kmemstats *ksp = &kmemstats[type]; @@ -362,30 +342,24 @@ return; #endif -#ifdef DIAGNOSTIC - if (addr < (void *)kmembase || addr >= (void *)kmemlimit) - panic("free: non-malloced addr %p type %s", addr, - memname[type]); -#endif - - kup = btokup(addr); - size = 1 << kup->ku_indx; - kbp = &bucket[kup->ku_indx]; s = splvm(); + if (addr >= (void *)kmembase && addr < (void *)kmemlimit) { + kup = btokup(addr); + size = 1 << kup->ku_indx; + kbp = &bucket[kup->ku_indx]; #ifdef DIAGNOSTIC - /* - * Check for returns of data that do not point to the - * beginning of the allocation. - */ - if (size > PAGE_SIZE) - alloc = addrmask[BUCKETINDX(PAGE_SIZE)]; - else - alloc = addrmask[kup->ku_indx]; - if (((u_long)addr & alloc) != 0) - panic("free: unaligned addr %p, size %ld, type %s, mask %ld", - addr, size, memname[type], alloc); + /* + * Check for returns of data that do not point to the + * beginning of the allocation. + */ + if (size > PAGE_SIZE) + alloc = addrmask[BUCKETINDX(PAGE_SIZE)]; + else + alloc = addrmask[kup->ku_indx]; + if (((u_long)addr & alloc) != 0) + panic("free: unaligned addr %p, size %ld, type %s, mask %ld", + addr, size, memname[type], alloc); #endif /* DIAGNOSTIC */ - if (size > MAXALLOCSAVE) { uvm_km_free(kmem_map, (vaddr_t)addr, ptoa(kup->ku_pagecnt)); #ifdef KMEMSTATS size = kup->ku_pagecnt << PGSHIFT; @@ -396,59 +370,26 @@ ksp->ks_memuse < ksp->ks_limit) wakeup(ksp); ksp->ks_inuse--; - kbp->kb_total -= 1; + kbp->kb_total--; #endif splx(s); return; } - freep = (struct freelist *)addr; + if (!pmap_extract(pmap_kernel(), (vaddr_t)addr, &pa)) + panic("free: pmap_extract failed"); + pg = PHYS_TO_VM_PAGE(pa); + if (pg == NULL) + panic("free: no page"); #ifdef DIAGNOSTIC - /* - * Check for multiple frees. Use a quick check to see if - * it looks free before laboriously searching the freelist. - */ - if (freep->spare0 == WEIRD_ADDR) { - for (cp = kbp->kb_next; cp; - cp = ((struct freelist *)cp)->next) { - if (addr != cp) - continue; - printf("multiply freed item %p\n", addr); - panic("free: duplicated free"); - } - } - /* - * Copy in known text to detect modification after freeing - * and to make it look free. Also, save the type being freed - * so we can list likely culprit if modification is detected - * when the object is reallocated. - */ - copysize = size < MAX_COPY ? size : MAX_COPY; - end = (int32_t *)&((caddr_t)addr)[copysize]; - for (lp = (int32_t *)addr; lp < end; lp++) - *lp = WEIRD_ADDR; - freep->type = type; -#endif /* DIAGNOSTIC */ + if (pg->pg_flags & PQ_FREE) + panic("free: page %p is free", pg); + if (pg->wire_count < MINBUCKET || (1 << pg->wire_count) > MAXALLOCSAVE) + panic("free: invalid page bucket %d", pg->wire_count); +#endif + pool_put(&mallocpl[pg->wire_count], addr); #ifdef KMEMSTATS - kup->ku_freecnt++; - if (kup->ku_freecnt >= kbp->kb_elmpercl) { - if (kup->ku_freecnt > kbp->kb_elmpercl) - panic("free: multiple frees"); - else if (kbp->kb_totalfree > kbp->kb_highwat) - kbp->kb_couldfree++; - } - kbp->kb_totalfree++; - ksp->ks_memuse -= size; - if (ksp->ks_memuse + size >= ksp->ks_limit && - ksp->ks_memuse < ksp->ks_limit) - wakeup(ksp); ksp->ks_inuse--; #endif - if (kbp->kb_next == NULL) - kbp->kb_next = addr; - else - ((struct freelist *)kbp->kb_last)->next = addr; - freep->next = NULL; - kbp->kb_last = addr; splx(s); } @@ -507,9 +448,7 @@ kmeminit(void) { vaddr_t base, limit; -#ifdef KMEMSTATS - long indx; -#endif + int i; #ifdef DIAGNOSTIC if (sizeof(struct freelist) > (1 << MINBUCKET)) @@ -529,16 +468,26 @@ kmemlimit = (char *)limit; kmemusage = (struct kmemusage *) uvm_km_zalloc(kernel_map, (vsize_t)(nkmempages * sizeof(struct kmemusage))); + /* + * init all the sub-page pools + */ + for (i = MINBUCKET; (1 << i) <= MAXALLOCSAVE; i++) { + snprintf(mallocplnames[i], sizeof(mallocplnames[i]), + "kmem%d", i); + pool_init(&mallocpl[i], 1 << i, 1 << i, 0, PR_LIMITFAIL, + mallocplnames[i], &pool_allocator_malloc); + } + #ifdef KMEMSTATS - for (indx = 0; indx < MINBUCKET + 16; indx++) { - if (1 << indx >= PAGE_SIZE) - bucket[indx].kb_elmpercl = 1; + for (i = 0; i < MINBUCKET + 16; i++) { + if (1 << i >= PAGE_SIZE) + bucket[i].kb_elmpercl = 1; else - bucket[indx].kb_elmpercl = PAGE_SIZE / (1 << indx); - bucket[indx].kb_highwat = 5 * bucket[indx].kb_elmpercl; + bucket[i].kb_elmpercl = PAGE_SIZE / (1 << i); + bucket[i].kb_highwat = 5 * bucket[i].kb_elmpercl; } - for (indx = 0; indx < M_LAST; indx++) - kmemstats[indx].ks_limit = nkmempages * PAGE_SIZE * 6 / 10; + for (i = 0; i < M_LAST; i++) + kmemstats[i].ks_limit = nkmempages * PAGE_SIZE * 6 / 10; #endif #ifdef MALLOC_DEBUG debug_malloc_init(); @@ -579,7 +528,6 @@ case KERN_MALLOC_BUCKET: bcopy(&bucket[BUCKETINDX(name[1])], &kb, sizeof(kb)); - kb.kb_next = kb.kb_last = 0; return (sysctl_rdstruct(oldp, oldlenp, newp, &kb, sizeof(kb))); case KERN_MALLOC_KMEMSTATS: #ifdef KMEMSTATS @@ -607,8 +555,9 @@ } memall = malloc(totlen + M_LAST, M_SYSCTL, M_WAITOK|M_ZERO); + bzero(memall, totlen + M_LAST); for (siz = 0, i = 0; i < M_LAST; i++) { - snprintf(memall + siz, + snprintf(memall + siz, totlen + M_LAST - siz, "%s,", memname[i] ? memname[i] : ""); siz += strlen(memall + siz); @@ -666,7 +615,7 @@ (*pr)("%15s %5ld %6ldK %7ldK %6ldK %9ld %8d %8d\n", memname[i], km->ks_inuse, km->ks_memuse / 1024, - km->ks_maxused / 1024, km->ks_limit / 1024, + km->ks_maxused / 1024, km->ks_limit / 1024, km->ks_calls, km->ks_limblocks, km->ks_mapblocks); } #else Index: sys/malloc.h =================================================================== RCS file: /cvs/src/sys/sys/malloc.h,v retrieving revision 1.90 diff -u -r1.90 malloc.h --- sys/malloc.h 28 Nov 2007 23:37:34 -0000 1.90 +++ sys/malloc.h 7 May 2008 11:51:50 -0000 @@ -348,8 +348,7 @@ * Set of buckets for each size of memory block that is retained */ struct kmembuckets { - caddr_t kb_next; /* list of free blocks */ - caddr_t kb_last; /* last free block */ + caddr_t kb_dummy[2]; u_int64_t kb_calls; /* total calls to allocate this size */ u_int64_t kb_total; /* total number of blocks allocated */ u_int64_t kb_totalfree; /* # of free elements in this bucket */ Index: sys/param.h =================================================================== RCS file: /cvs/src/sys/sys/param.h,v retrieving revision 1.74 diff -u -r1.74 param.h --- sys/param.h 10 Mar 2008 17:15:33 -0000 1.74 +++ sys/param.h 7 May 2008 11:51:50 -0000 @@ -212,8 +212,8 @@ * Constraints: PAGE_SIZE <= MAXALLOCSAVE <= 2 ** (MINBUCKET + 14), and * MAXALLOCSIZE must be a power of two. */ -#define MINBUCKET 4 /* 4 => min allocation of 16 bytes */ -#define MAXALLOCSAVE (2 * PAGE_SIZE) +#define MINBUCKET (sizeof(long) == 4? 4 : 5) +#define MAXALLOCSAVE (PAGE_SIZE) /* * Scale factor for scaled integers used to count %cpu time and load avgs. Index: uvm/uvm_amap.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_amap.c,v retrieving revision 1.40 diff -u -r1.40 uvm_amap.c --- uvm/uvm_amap.c 7 Sep 2007 15:00:20 -0000 1.40 +++ uvm/uvm_amap.c 7 May 2008 11:59:30 -0000 @@ -369,6 +369,9 @@ * XXXCDC: could we take advantage of a kernel realloc()? */ + if (slotneed >= UVM_AMAP_LARGE) + return E2BIG; + slotalloc = malloc_roundup(slotneed * sizeof(int)) / sizeof(int); #ifdef UVM_AMAP_PPREF newppref = NULL; Index: uvm/uvm_extern.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm_extern.h,v retrieving revision 1.68 diff -u -r1.68 uvm_extern.h --- uvm/uvm_extern.h 29 Nov 2007 00:26:41 -0000 1.68 +++ uvm/uvm_extern.h 7 May 2008 11:55:28 -0000 @@ -525,6 +525,7 @@ vaddr_t, vaddr_t, boolean_t, boolean_t); void uvmspace_exec(struct proc *, vaddr_t, vaddr_t); struct vmspace *uvmspace_fork(struct vmspace *); +void uvmspace_unmap(struct vmspace *); void uvmspace_free(struct vmspace *); void uvmspace_share(struct proc *, struct proc *); void uvmspace_unshare(struct proc *); Index: uvm/uvm_init.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_init.c,v retrieving revision 1.16 diff -u -r1.16 uvm_init.c --- uvm/uvm_init.c 18 Jun 2007 21:51:15 -0000 1.16 +++ uvm/uvm_init.c 8 May 2008 19:59:40 -0000 @@ -48,6 +48,7 @@ #include <sys/proc.h> #include <sys/malloc.h> #include <sys/vnode.h> +#include <sys/kthread.h> #include <uvm/uvm.h> @@ -125,7 +126,11 @@ * kernel memory allocator (malloc) can be used. */ + uvm_km_page_init(); kmeminit(); +#ifndef PMAP_DIRECT + kthread_create_deferred(uvm_km_createthread, NULL); +#endif /* * step 7: init all pagers and the pager_map. @@ -148,8 +153,6 @@ uvm_page_rehash(); uao_create(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, UAO_FLAG_KERNSWAP); - - uvm_km_page_init(); /* * reserve some unmapped space for malloc/pool use after free usage Index: uvm/uvm_km.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_km.c,v retrieving revision 1.66 diff -u -r1.66 uvm_km.c --- uvm/uvm_km.c 15 Dec 2007 03:42:57 -0000 1.66 +++ uvm/uvm_km.c 8 May 2008 19:59:22 -0000 @@ -791,9 +791,6 @@ struct km_page *next; } *uvm_km_pages_head; -void uvm_km_createthread(void *); -void uvm_km_thread(void *); - /* * Allocate the initial reserve, and create the thread which will * keep the reserve full. For bootstrapping, we allocate more than @@ -817,7 +814,7 @@ } for (i = 0; i < uvm_km_pages_lowat * 4; i++) { - page = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE); + page = (void *)uvm_km_kmemalloc(kernel_map, NULL, PAGE_SIZE, 0); page->next = uvm_km_pages_head; uvm_km_pages_head = page; } @@ -826,8 +823,6 @@ /* tone down if really high */ if (uvm_km_pages_lowat > 512) uvm_km_pages_lowat = 512; - - kthread_create_deferred(uvm_km_createthread, NULL); } void @@ -852,7 +847,8 @@ if (i < want || uvm_km_pages_free >= uvm_km_pages_lowat) tsleep(&uvm_km_pages_head, PVM, "kmalloc", 0); for (i = 0; i < want; i++) { - page = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE); + page = (void *)uvm_km_kmemalloc(kernel_map, NULL, + PAGE_SIZE, 0); if (i == 0) head = tail = page; if (page == NULL) Index: uvm/uvm_km.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm_km.h,v retrieving revision 1.9 diff -u -r1.9 uvm_km.h --- uvm/uvm_km.h 11 Apr 2007 12:10:42 -0000 1.9 +++ uvm/uvm_km.h 7 May 2008 11:53:12 -0000 @@ -50,6 +50,8 @@ void uvm_km_init(vaddr_t, vaddr_t); void uvm_km_page_init(void); +void uvm_km_createthread(void *); +void uvm_km_thread(void *); void uvm_km_pgremove(struct uvm_object *, vaddr_t, vaddr_t); void uvm_km_pgremove_intrsafe(vaddr_t, vaddr_t); Index: uvm/uvm_map.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_map.c,v retrieving revision 1.99 diff -u -r1.99 uvm_map.c --- uvm/uvm_map.c 15 Sep 2007 10:10:37 -0000 1.99 +++ uvm/uvm_map.c 8 May 2008 21:26:48 -0000 @@ -834,10 +834,8 @@ if (prev_entry->aref.ar_amap) { error = amap_extend(prev_entry, size); - if (error) { - vm_map_unlock(map); - return (error); - } + if (error) + goto step3; } UVMCNT_INCR(map_backmerge); @@ -3263,6 +3261,37 @@ uvmspace_free(ovm); } +} + +/* + * uvmspace_unmap: unmap all mappings in vmspace + * - XXX: no locking on vmspace + */ + +void +uvmspace_unmap(struct vmspace *vm) +{ + vm_map_entry_t dead_entries; + UVMHIST_FUNC("uvmspace_unmap"); UVMHIST_CALLED(maphist); + + UVMHIST_LOG(maphist,"(vm=0x%x) ref=%d", vm, vm->vm_refcnt,0,0); + if (vm->vm_refcnt == 1) { + /* + * lock the map, to wait out all other references to it. delete + * all of the mappings and pages they hold, then call the pmap + * module to reclaim anything left. + */ + vm_map_lock(&vm->vm_map); + if (vm->vm_map.nentries) { + uvm_unmap_remove(&vm->vm_map, + vm->vm_map.min_offset, vm->vm_map.max_offset, + &dead_entries, NULL); + if (dead_entries != NULL) + uvm_unmap_detach(dead_entries, 0); + } + vm_map_unlock(&vm->vm_map); + } + UVMHIST_LOG(maphist,"<- done", 0,0,0,0); } /*