For inline patches fans, here it is: This is a new version of the patch.
No major improvement, it's just a cleaning of the previous patch: - Removed the rtl8139.c, which I forgot to remove in the previous patch; - Removed some debugging printf that I forgot, in pure StoMach style; Happy Testing, Gianluca diff -ru gnumach-vanilla/i386/i386at/model_dep.c gnumach/i386/i386at/model_dep.c --- gnumach-vanilla/i386/i386at/model_dep.c 2004-11-28 18:29:35.000000000 +0100 +++ gnumach/i386/i386at/model_dep.c 2006-01-22 01:08:09.000000000 +0100 @@ -86,16 +86,7 @@ /* Configuration parameter: if zero, only use physical memory in the low 16MB of addresses. Only SCSI still has DMA problems. */ -#ifdef LINUX_DEV -#define use_all_mem 1 -#else -#include "nscsi.h" -#if NSCSI > 0 -#define use_all_mem 0 -#else #define use_all_mem 1 -#endif -#endif extern char version[]; @@ -454,7 +445,6 @@ vm_offset_t addr; extern char start[], end[]; int i; - static int wrapped = 0; /* Memory regions to skip. */ vm_offset_t cmdline_start_pa = boot_info.flags & MULTIBOOT_CMDLINE @@ -474,25 +464,8 @@ /* Page-align the start address. */ avail_next = round_page(avail_next); - /* Start with memory above 16MB, reserving the low memory for later. */ - if (use_all_mem && !wrapped && phys_last_addr > 16 * 1024*1024) - { - if (avail_next < 16 * 1024*1024) - avail_next = 16 * 1024*1024; - else if (avail_next == phys_last_addr) - { - /* We have used all the memory above 16MB, so now start on - the low memory. This will wind up at the end of the list - of free pages, so it should not have been allocated to any - other use in early initialization before the Linux driver - glue initialization needs to allocate low memory. */ - avail_next = 0x1000; - wrapped = 1; - } - } - /* Check if we have reached the end of memory. */ - if (avail_next == (wrapped ? 16 * 1024*1024 : phys_last_addr)) + if (avail_next == phys_last_addr) return FALSE; /* Tentatively assign the current location to the caller. */ @@ -585,107 +558,3 @@ !(((boot_info.mem_lower * 1024) <= x) && (x < 1024*1024))); } -#ifndef NBBY -#define NBBY 8 -#endif -#ifndef NBPW -#define NBPW (NBBY * sizeof(int)) -#endif -#define DMA_MAX (16*1024*1024) - -/* - * Allocate contiguous pages below 16 MB - * starting at specified boundary for DMA. - */ -vm_offset_t -alloc_dma_mem(size, align) - vm_size_t size; - vm_offset_t align; -{ - int *bits, i, j, k, n; - int npages, count, bit, mask; - int first_page, last_page; - vm_offset_t addr; - vm_page_t p, prevp; - - npages = round_page(size) / PAGE_SIZE; - mask = align ? (align - 1) / PAGE_SIZE : 0; - - /* - * Allocate bit array. - */ - n = ((DMA_MAX / PAGE_SIZE) + NBPW - 1) / NBPW; - i = n * NBPW; - bits = (unsigned *)kalloc(i); - if (bits == 0) { - printf("alloc_dma_mem: unable alloc bit array\n"); - return (0); - } - bzero((char *)bits, i); - - /* - * Walk the page free list and set a bit for - * every usable page in bit array. - */ - simple_lock(&vm_page_queue_free_lock); - for (p = vm_page_queue_free; p; p = (vm_page_t)p->pageq.next) { - if (p->phys_addr < DMA_MAX) { - i = p->phys_addr / PAGE_SIZE; - bits[i / NBPW] |= 1 << (i % NBPW); - } - } - - /* - * Search for contiguous pages by scanning bit array. - */ - for (i = 0, first_page = -1; i < n; i++) { - for (bit = 1, j = 0; j < NBPW; j++, bit <<= 1) { - if (bits[i] & bit) { - if (first_page < 0) { - k = i * NBPW + j; - if (!mask - || (((k & mask) + npages) - <= mask + 1)) { - first_page = k; - if (npages == 1) - goto found; - count = 1; - } - } else if (++count == npages) - goto found; - } else - first_page = -1; - } - } - addr = 0; - goto out; - - found: - /* - * Remove pages from the free list. - */ - addr = first_page * PAGE_SIZE; - last_page = first_page + npages; - vm_page_free_count -= npages; - p = vm_page_queue_free; - prevp = 0; - while (1) { - i = p->phys_addr / PAGE_SIZE; - if (i >= first_page && i < last_page) { - if (prevp) - prevp->pageq.next = p->pageq.next; - else - vm_page_queue_free = (vm_page_t)p->pageq.next; - p->free = FALSE; - if (--npages == 0) - break; - } else - prevp = p; - p = (vm_page_t)p->pageq.next; - } - - out: - simple_unlock(&vm_page_queue_free_lock); - kfree((vm_offset_t)bits, n * NBPW); - return (addr); -} diff -ru gnumach-vanilla/i386/intel/pmap.c gnumach/i386/intel/pmap.c --- gnumach-vanilla/i386/intel/pmap.c 2001-04-05 08:39:21.000000000 +0200 +++ gnumach/i386/intel/pmap.c 2006-01-22 01:08:09.000000000 +0100 @@ -584,6 +584,11 @@ return(virt); } +unsigned long vm_page_normal_first = 16*1024*1024; +unsigned long vm_page_normal_last = 0; +unsigned long vm_page_dma_first = 0; +unsigned long vm_page_dma_last = 16*1024*1024 - 1; + /* * Bootstrap the system enough to run with virtual memory. * Allocate the kernel page directory and page tables, @@ -698,6 +703,25 @@ va += INTEL_PGBYTES; } } + + if (phys_last_addr <= 16*1024*1024) { + /* Set so to never get TRUE from isnormal(). */ + vm_page_normal_first = phys_last_addr + 1; + vm_page_normal_last = 0; + + /* Only DMA memory. */ + vm_page_dma_first = 0; + vm_page_dma_last = phys_last_addr; + } else { + vm_page_normal_first = 16*1024*1024; + vm_page_normal_last = phys_last_addr; + + vm_page_dma_first = 0; + vm_page_dma_last = 16*1024*1024 - 1; + } + + + } #if i860 @@ -2341,6 +2365,27 @@ return (phys_attribute_test(phys, PHYS_REFERENCED)); } +/* + * pmap_is_dma + * + * Return TRUE if PHYS is in the DMA zone range. + */ +boolean_t pmap_is_dma (vm_offset_t phys) +{ + return (phys < 16*1024*1024); +} + +/* + * pmap_is_normal: + * + * Return TRUE if PHYS is in the normal zone range. + */ +boolean_t pmap_is_normal (vm_offset_t phys) +{ + return (phys >= 16*1024*1024); +} + + #if NCPUS > 1 /* * TLB Coherence Code (TLB "shootdown" code) diff -ru gnumach-vanilla/kern/startup.c gnumach/kern/startup.c --- gnumach-vanilla/kern/startup.c 2001-04-05 08:39:20.000000000 +0200 +++ gnumach/kern/startup.c 2006-01-22 01:08:09.000000000 +0100 @@ -80,9 +80,6 @@ extern void action_thread(); #endif /* NCPUS > 1 */ -/* XX */ -extern vm_offset_t phys_first_addr, phys_last_addr; - /* * Running in virtual memory, on the interrupt stack. * Does not return. Dispatches initial thread. @@ -122,7 +119,7 @@ machine_init(); machine_info.max_cpus = NCPUS; - machine_info.memory_size = phys_last_addr - phys_first_addr; /* XXX mem_size */ + machine_info.memory_size = phys_last_addr - phys_first_addr; machine_info.avail_cpus = 0; machine_info.major_version = KERNEL_MAJOR_VERSION; machine_info.minor_version = KERNEL_MINOR_VERSION; diff -ru gnumach-vanilla/linux/dev/glue/kmem.c gnumach/linux/dev/glue/kmem.c --- gnumach-vanilla/linux/dev/glue/kmem.c 1999-04-26 07:45:35.000000000 +0200 +++ gnumach/linux/dev/glue/kmem.c 2006-01-22 02:36:14.000000000 +0100 @@ -25,6 +25,7 @@ #include <sys/types.h> #include <mach/mach_types.h> +#include <mach/error.h> #include <mach/vm_param.h> #include <kern/assert.h> @@ -40,22 +41,11 @@ #include <asm/system.h> -extern void *alloc_contig_mem (unsigned, unsigned, unsigned, vm_page_t *); extern int printf (const char *, ...); -/* Amount of memory to reserve for Linux memory allocator. - We reserve 64K chunks to stay within DMA limits. - Increase MEM_CHUNKS if the kernel is running out of memory. */ -#define MEM_CHUNK_SIZE (64 * 1024) -#define MEM_CHUNKS 7 - /* Mininum amount that linux_kmalloc will allocate. */ #define MIN_ALLOC 12 -#ifndef NBPW -#define NBPW 32 -#endif - /* Memory block header. */ struct blkhdr { @@ -70,62 +60,17 @@ struct pagehdr *next; /* next header in list */ }; -/* This structure describes a memory chunk. */ -struct chunkhdr -{ - unsigned long start; /* start address */ - unsigned long end; /* end address */ - unsigned long bitmap; /* busy/free bitmap of pages */ -}; - -/* Chunks from which pages are allocated. */ -static struct chunkhdr pages_free[MEM_CHUNKS]; - /* Memory list maintained by linux_kmalloc. */ -static struct pagehdr *memlist; +static struct pagehdr *memlist_dma = NULL; +static struct pagehdr *memlist_nml = NULL; /* Some statistics. */ int num_block_coalesce = 0; -int num_page_collect = 0; -int linux_mem_avail; /* Initialize the Linux memory allocator. */ void linux_kmem_init () { - int i, j; - vm_page_t p, pages; - - for (i = 0; i < MEM_CHUNKS; i++) - { - /* Allocate memory. */ - pages_free[i].start = (unsigned long) alloc_contig_mem (MEM_CHUNK_SIZE, - 16 * 1024 * 1024, - 0xffff, &pages); - - assert (pages_free[i].start); - assert ((pages_free[i].start & 0xffff) == 0); - - /* Sanity check: ensure pages are contiguous and within DMA limits. */ - for (p = pages, j = 0; j < MEM_CHUNK_SIZE - PAGE_SIZE; j += PAGE_SIZE) - { - assert (p->phys_addr < 16 * 1024 * 1024); - assert (p->phys_addr + PAGE_SIZE - == ((vm_page_t) p->pageq.next)->phys_addr); - - p = (vm_page_t) p->pageq.next; - } - - pages_free[i].end = pages_free[i].start + MEM_CHUNK_SIZE; - - /* Initialize free page bitmap. */ - pages_free[i].bitmap = 0; - j = MEM_CHUNK_SIZE >> PAGE_SHIFT; - while (--j >= 0) - pages_free[i].bitmap |= 1 << j; - } - - linux_mem_avail = (MEM_CHUNKS * MEM_CHUNK_SIZE) >> PAGE_SHIFT; } /* Return the number by which the page size should be @@ -178,7 +123,40 @@ num_block_coalesce++; - for (ph = memlist; ph; ph = ph->next) + /* Coalesce DMA memory. */ + for (ph = memlist_dma; ph; ph = ph->next) + { + bh = (struct blkhdr *) (ph + 1); + ebh = (struct blkhdr *) ((void *) ph + ph->size); + while (1) + { + /* Skip busy blocks. */ + while (bh < ebh && !bh->free) + bh = (struct blkhdr *) ((void *) (bh + 1) + bh->size); + if (bh == ebh) + break; + + /* Merge adjacent free blocks. */ + while (1) + { + bhp = (struct blkhdr *) ((void *) (bh + 1) + bh->size); + if (bhp == ebh) + { + bh = bhp; + break; + } + if (!bhp->free) + { + bh = (struct blkhdr *) ((void *) (bhp + 1) + bhp->size); + break; + } + bh->size += bhp->size + sizeof (struct blkhdr); + } + } + } + + /* Coalesce non-DMA memory. */ + for (ph = memlist_nml; ph; ph = ph->next) { bh = (struct blkhdr *) (ph + 1); ebh = (struct blkhdr *) ((void *) ph + ph->size); @@ -216,20 +194,26 @@ void * linux_kmalloc (unsigned int size, int priority) { - int order, coalesced = 0; + int order, coalesced = 0, dma = 0; unsigned flags; - struct pagehdr *ph; + struct pagehdr *ph, **memlistp; struct blkhdr *bh, *new_bh; + if (priority & GFP_DMA) + { + memlistp = &memlist_dma; + dma = 1; + } + else + { + memlistp = &memlist_nml; + dma = 0; + } if (size < MIN_ALLOC) size = MIN_ALLOC; else size = (size + sizeof (int) - 1) & ~(sizeof (int) - 1); - assert (size <= (MEM_CHUNK_SIZE - - sizeof (struct pagehdr) - - sizeof (struct blkhdr))); - save_flags (flags); cli (); @@ -238,7 +222,7 @@ /* Walk the page list and find the first free block with size greater than or equal to the one required. */ - for (ph = memlist; ph; ph = ph->next) + for (ph = *memlistp; ph; ph = ph->next) { bh = (struct blkhdr *) (ph + 1); while (bh < (struct blkhdr *) ((void *) ph + ph->size)) @@ -278,16 +262,26 @@ order = get_page_order (size + sizeof (struct pagehdr) + sizeof (struct blkhdr)); - ph = (struct pagehdr *) __get_free_pages (GFP_KERNEL, order, ~0UL); + ph = (struct pagehdr *) __get_free_pages (GFP_KERNEL, order, dma); if (!ph) { restore_flags (flags); return NULL; } + /* __get_free_pages may return DMA memory if non-DMA memory is not + free so we check back here for mem type. */ + if (pmap_is_dma ((unsigned long)ph)) + { + memlistp = &memlist_dma; + } + else + { + memlistp = &memlist_nml; + } ph->size = PAGE_SIZE << order; - ph->next = memlist; - memlist = ph; + ph->next = *memlistp; + *memlistp = ph; bh = (struct blkhdr *) (ph + 1); bh->free = 0; bh->size = ph->size - sizeof (struct pagehdr) - sizeof (struct blkhdr); @@ -310,17 +304,28 @@ linux_kfree (void *p) { unsigned flags; + vm_offset_t addr; struct blkhdr *bh; - struct pagehdr *ph; + struct pagehdr *ph, **memlistp; assert (((int) p & (sizeof (int) - 1)) == 0); + addr = (vm_offset_t) p; + + if (pmap_is_dma (addr)) + { + memlistp = &memlist_dma; + } + else + { + memlistp = &memlist_nml; + } save_flags (flags); cli (); check_page_list (__LINE__); - for (ph = memlist; ph; ph = ph->next) + for (ph = *memlistp; ph; ph = ph->next) if (p >= (void *) ph && p < (void *) ph + ph->size) break; @@ -339,10 +344,10 @@ restore_flags (flags); } -/* Free any pages that are not in use. +/* Free any DMA page that are not in use. Called by __get_free_pages when pages are running low. */ static void -collect_kmalloc_pages () +collect_kmalloc_pages_dma () { struct blkhdr *bh; struct pagehdr *ph, **prev_ph; @@ -353,8 +358,8 @@ check_page_list (__LINE__); - ph = memlist; - prev_ph = &memlist; + ph = memlist_dma; + prev_ph = &memlist_dma; while (ph) { bh = (struct blkhdr *) (ph + 1); @@ -373,68 +378,91 @@ check_page_list (__LINE__); } - -/* Allocate ORDER + 1 number of physically contiguous pages. - PRIORITY and DMA are not used in Mach. - - XXX: This needs to be dynamic. To do that we need to make - the Mach page manipulation routines interrupt safe and they - must provide machine dependant hooks. */ -unsigned long -__get_free_pages (int priority, unsigned long order, int dma) +/* Free any non-DMA page that are not in use. + Called by __get_free_pages when pages are running low. */ +static void +collect_kmalloc_pages_nml () { - int i, pages_collected = 0; - unsigned flags, bits, off, j, len; + struct blkhdr *bh; + struct pagehdr *ph, **prev_ph; - assert ((PAGE_SIZE << order) <= MEM_CHUNK_SIZE); + check_page_list (__LINE__); - /* Construct bitmap of contiguous pages. */ - bits = 0; - j = 0; - len = 0; - while (len < (PAGE_SIZE << order)) - { - bits |= 1 << j++; - len += PAGE_SIZE; - } + coalesce_blocks (); - save_flags (flags); - cli (); -again: + check_page_list (__LINE__); - /* Search each chunk for the required number of contiguous pages. */ - for (i = 0; i < MEM_CHUNKS; i++) + ph = memlist_nml; + prev_ph = &memlist_nml; + while (ph) { - off = 0; - j = bits; - while (MEM_CHUNK_SIZE - off >= (PAGE_SIZE << order)) + bh = (struct blkhdr *) (ph + 1); + if (bh->free && (void *) (bh + 1) + bh->size == (void *) ph + ph->size) { - if ((pages_free[i].bitmap & j) == j) - { - pages_free[i].bitmap &= ~j; - linux_mem_avail -= order + 1; - restore_flags (flags); - return pages_free[i].start + off; - } - j <<= 1; - off += PAGE_SIZE; + *prev_ph = ph->next; + free_pages ((unsigned long) ph, get_page_order (ph->size)); + ph = *prev_ph; + } + else + { + prev_ph = &ph->next; + ph = ph->next; } } - /* Allocation failed; collect kmalloc and buffer pages - and try again. */ - if (!pages_collected) - { - num_page_collect++; - collect_kmalloc_pages (); - pages_collected = 1; - goto again; - } + check_page_list (__LINE__); +} - printf ("%s:%d: __get_free_pages: ran out of pages\n", __FILE__, __LINE__); +/* Allocate ORDER + 1 number of physically contiguous pages. + PRIORITY and DMA are not used in Mach. */ +unsigned long +__get_free_pages (int priority, unsigned long order, int dma) +{ + unsigned long pagenum; + unsigned vm_page_flags = 0; + unsigned long p; + + if (dma) + vm_page_flags |= VM_PAGE_DMA; + + pagenum = (1 << order); + + p = 0; + + if (pagenum > 1) + { + /* + * Contiguous grabbing is slow and may fail. + * We reserve it for special occasions. + */ + mach_error_t err; + vm_offset_t vmo; + + err = vm_page_grab_contiguous_pages_flags (pagenum, &vmo, FALSE, + vm_page_flags, 0); + p = (unsigned long) vmo; + + if (err) + return 0; + } + else + { + vm_page_t m; + + m = vm_page_grab_flags (FALSE, vm_page_flags); + if (!m) + return 0; + + p = m->phys_addr; + + if (m->tabled) + { + printf ("Error while getting page of order %ld\n", order); + return 0; + } + }; - restore_flags (flags); - return 0; + return p; } /* Free ORDER + 1 number of physically @@ -442,36 +470,20 @@ void free_pages (unsigned long addr, unsigned long order) { - int i; - unsigned flags, bits, len, j; + unsigned long i, pagenum; - assert ((addr & PAGE_MASK) == 0); - - for (i = 0; i < MEM_CHUNKS; i++) - if (addr >= pages_free[i].start && addr < pages_free[i].end) - break; + pagenum = 1 << order; - assert (i < MEM_CHUNKS); - - /* Contruct bitmap of contiguous pages. */ - len = 0; - j = 0; - bits = 0; - while (len < (PAGE_SIZE << order)) + for (i = 0; i < pagenum; i++) { - bits |= 1 << j++; - len += PAGE_SIZE; - } - bits <<= (addr - pages_free[i].start) >> PAGE_SHIFT; + vm_page_t m; - save_flags (flags); - cli (); + m = vm_page_physaddr_lookup (addr + (i * PAGE_SIZE)); + if (m == VM_PAGE_NULL) + panic ("couldn't lookup page for address %lx", addr + (i * PAGE_SIZE)); - assert ((pages_free[i].bitmap & bits) == 0); - - pages_free[i].bitmap |= bits; - linux_mem_avail += order + 1; - restore_flags (flags); + vm_page_free (m); + } } diff -ru gnumach-vanilla/linux/dev/init/main.c gnumach/linux/dev/init/main.c --- gnumach-vanilla/linux/dev/init/main.c 1999-04-26 07:49:06.000000000 +0200 +++ gnumach/linux/dev/init/main.c 2006-01-22 02:35:54.000000000 +0100 @@ -82,9 +82,7 @@ static void calibrate_delay (void); extern int hz; -extern vm_offset_t phys_last_addr; -extern void *alloc_contig_mem (unsigned, unsigned, unsigned, vm_page_t *); extern void free_contig_mem (vm_page_t); extern void init_IRQ (void); extern void restore_IRQ (void); @@ -105,10 +103,8 @@ extern void linux_sched_init (void); -/* - * Amount of contiguous memory to allocate for initialization. - */ -#define CONTIG_ALLOC (512 * 1024) +/* Amount of contiguous memory to allocate for initialization. */ +#define CONTIG_ALLOC_ORDER (7) /* 512kb. */ /* * Initialize Linux drivers. @@ -117,7 +113,7 @@ linux_init (void) { int addr; - unsigned memory_start, memory_end; + unsigned long memory_start, memory_end; vm_page_t pages; /* @@ -142,40 +138,34 @@ memcpy ((char *) &drive_info + 16, (void *) ((addr & 0xffff) + ((addr >> 12) & 0xffff0)), 16); - /* - * Initialize Linux memory allocator. - */ + /* Initialize Linux memory allocator. */ linux_kmem_init (); - /* - * Allocate contiguous memory below 16 MB. - */ - memory_start = (unsigned long) alloc_contig_mem (CONTIG_ALLOC, - 16 * 1024 * 1024, - 0, &pages); - if (memory_start == 0) - panic ("linux_init: alloc_contig_mem failed"); - memory_end = memory_start + CONTIG_ALLOC; + /* Allocate contiguous memory below 16 MB. */ + memory_start = __get_free_pages (GFP_ATOMIC, CONTIG_ALLOC_ORDER, 1); + if (!memory_start) + panic ("linux_init: alloc PCI memory failed"); + memory_end = memory_start + ((1 << CONTIG_ALLOC_ORDER) * PAGE_SIZE); - /* - * Initialize PCI bus. - */ + /* Initialize PCI bus. */ memory_start = pci_init (memory_start, memory_end); if (memory_start > memory_end) panic ("linux_init: ran out memory"); - /* - * Free unused memory. - */ - while (pages && pages->phys_addr < round_page (memory_start)) - pages = (vm_page_t) pages->pageq.next; - if (pages) - free_contig_mem (pages); + /* Free unused memory. */ + { + unsigned long memaddr; + + for (memaddr = round_page (memory_start); + memaddr < memory_end; + memaddr += PAGE_SIZE) + { + free_pages (memaddr, 0); + } + } - /* - * Initialize devices. - */ + /* Initialize devices. */ #ifdef CONFIG_INET linux_net_emulation_init (); #endif @@ -186,148 +176,6 @@ linux_auto_config = 0; } -#ifndef NBPW -#define NBPW 32 -#endif - -/* - * Allocate contiguous memory with the given constraints. - * This routine is horribly inefficient but it is presently - * only used during initialization so it's not that bad. - */ -void * -alloc_contig_mem (unsigned size, unsigned limit, - unsigned mask, vm_page_t * pages) -{ - int i, j, bits_len; - unsigned *bits, len; - void *m; - vm_page_t p, page_list, tail, prev; - vm_offset_t addr, max_addr; - - if (size == 0) - return (NULL); - size = round_page (size); - if ((size >> PAGE_SHIFT) > vm_page_free_count) - return (NULL); - - /* Allocate bit array. */ - max_addr = phys_last_addr; - if (max_addr > limit) - max_addr = limit; - bits_len = ((((max_addr >> PAGE_SHIFT) + NBPW - 1) / NBPW) - * sizeof (unsigned)); - bits = (unsigned *) kalloc (bits_len); - if (!bits) - return (NULL); - memset (bits, 0, bits_len); - - /* - * Walk the page free list and set a bit for every usable page. - */ - simple_lock (&vm_page_queue_free_lock); - p = vm_page_queue_free; - while (p) - { - if (p->phys_addr < limit) - (bits[(p->phys_addr >> PAGE_SHIFT) / NBPW] - |= 1 << ((p->phys_addr >> PAGE_SHIFT) % NBPW)); - p = (vm_page_t) p->pageq.next; - } - - /* - * Scan bit array for contiguous pages. - */ - len = 0; - m = NULL; - for (i = 0; len < size && i < bits_len / sizeof (unsigned); i++) - for (j = 0; len < size && j < NBPW; j++) - if (!(bits[i] & (1 << j))) - { - len = 0; - m = NULL; - } - else - { - if (len == 0) - { - addr = ((vm_offset_t) (i * NBPW + j) - << PAGE_SHIFT); - if ((addr & mask) == 0) - { - len += PAGE_SIZE; - m = (void *) addr; - } - } - else - len += PAGE_SIZE; - } - - if (len != size) - { - simple_unlock (&vm_page_queue_free_lock); - kfree ((vm_offset_t) bits, bits_len); - return (NULL); - } - - /* - * Remove pages from free list - * and construct list to return to caller. - */ - page_list = NULL; - for (len = 0; len < size; len += PAGE_SIZE, addr += PAGE_SIZE) - { - prev = NULL; - for (p = vm_page_queue_free; p; p = (vm_page_t) p->pageq.next) - { - if (p->phys_addr == addr) - break; - prev = p; - } - if (!p) - panic ("alloc_contig_mem: page not on free list"); - if (prev) - prev->pageq.next = p->pageq.next; - else - vm_page_queue_free = (vm_page_t) p->pageq.next; - p->free = FALSE; - p->pageq.next = NULL; - if (!page_list) - page_list = tail = p; - else - { - tail->pageq.next = (queue_entry_t) p; - tail = p; - } - vm_page_free_count--; - } - - simple_unlock (&vm_page_queue_free_lock); - kfree ((vm_offset_t) bits, bits_len); - if (pages) - *pages = page_list; - return (m); -} - -/* - * Free memory allocated by alloc_contig_mem. - */ -void -free_contig_mem (vm_page_t pages) -{ - int i; - vm_page_t p; - - for (p = pages, i = 0; p->pageq.next; p = (vm_page_t) p->pageq.next, i++) - p->free = TRUE; - p->free = TRUE; - simple_lock (&vm_page_queue_free_lock); - vm_page_free_count += i + 1; - p->pageq.next = (queue_entry_t) vm_page_queue_free; - vm_page_queue_free = pages; - simple_unlock (&vm_page_queue_free_lock); -} - /* This is the number of bits of precision for the loops_per_second. Each * bit takes on average 1.5/HZ seconds. This (like the original) is a little * better than 1% diff -ru gnumach-vanilla/vm/pmap.h gnumach/vm/pmap.h --- gnumach-vanilla/vm/pmap.h 2001-04-05 08:39:21.000000000 +0200 +++ gnumach/vm/pmap.h 2006-01-22 01:08:09.000000000 +0100 @@ -174,6 +174,15 @@ /* Return modify bit */ boolean_t pmap_is_modified(vm_offset_t pa); +/* + * Page Zones routines + */ + +/* Physical address is in DMA capable zone. */ +boolean_t pmap_is_dma (vm_offset_t pa); + +/* Physical address is in non-DMA capable zone. */ +boolean_t pmap_is_normal (vm_offset_t pa); /* * Statistics routines diff -ru gnumach-vanilla/vm/vm_page.h gnumach/vm/vm_page.h --- gnumach-vanilla/vm/vm_page.h 1999-06-28 02:41:02.000000000 +0200 +++ gnumach/vm/vm_page.h 2006-01-22 01:08:09.000000000 +0100 @@ -152,22 +152,26 @@ * ordered, in LRU-like fashion. */ +#define VM_PAGE_DMA 0x1 + +extern +queue_head_t vm_page_queue_free_normal; /* normal memory free queue */ extern -vm_page_t vm_page_queue_free; /* memory free queue */ +queue_head_t vm_page_queue_free_dma; /* DMA-capable memory free queue */ extern -vm_page_t vm_page_queue_fictitious; /* fictitious free queue */ +vm_page_t vm_page_queue_fictitious; /* fictitious free queue */ extern -queue_head_t vm_page_queue_active; /* active memory queue */ +queue_head_t vm_page_queue_active; /* active memory queue */ extern queue_head_t vm_page_queue_inactive; /* inactive memory queue */ extern -vm_offset_t first_phys_addr; /* physical address for first_page */ +vm_offset_t phys_first_addr;/* physical address for first_page */ extern -vm_offset_t last_phys_addr; /* physical address for last_page */ +vm_offset_t phys_last_addr; /* physical address for last_page */ extern -int vm_page_free_count; /* How many pages are free? */ +int vm_page_free_count; /* How many pages are free? */ extern int vm_page_fictitious_count;/* How many fictitious pages are free? */ extern @@ -220,11 +224,20 @@ extern vm_page_t vm_page_lookup( vm_object_t object, vm_offset_t offset); +extern vm_page_t vm_page_physaddr_lookup (vm_offset_t); extern vm_page_t vm_page_grab_fictitious(void); extern void vm_page_release_fictitious(vm_page_t); extern boolean_t vm_page_convert(vm_page_t, boolean_t); extern void vm_page_more_fictitious(void); extern vm_page_t vm_page_grab(boolean_t); +extern vm_page_t vm_page_grab_flags(boolean_t, unsigned); +extern kern_return_t vm_page_grab_contiguous_pages_flags( + int npages, + vm_offset_t *phys_address, + boolean_t external, + unsigned flags, + unsigned long align); + extern void vm_page_release(vm_page_t, boolean_t); extern void vm_page_wait(void (*)(void)); extern vm_page_t vm_page_alloc( diff -ru gnumach-vanilla/vm/vm_resident.c gnumach/vm/vm_resident.c --- gnumach-vanilla/vm/vm_resident.c 1999-09-04 15:03:32.000000000 +0200 +++ gnumach/vm/vm_resident.c 2006-01-22 02:37:16.000000000 +0100 @@ -56,6 +56,9 @@ #include <vm/vm_user.h> #endif +extern unsigned long vm_page_normal_first, vm_page_normal_last; +extern unsigned long vm_page_dma_first, vm_page_dma_last; + /* in zalloc.c XXX */ extern vm_offset_t zdata; extern vm_size_t zdata_size; @@ -105,14 +108,19 @@ * Resident pages that represent real memory * are allocated from a free list. */ -vm_page_t vm_page_queue_free; +queue_head_t vm_page_queue_free_normal; +queue_head_t vm_page_queue_free_dma; vm_page_t vm_page_queue_fictitious; decl_simple_lock_data(,vm_page_queue_free_lock) + unsigned int vm_page_free_wanted; int vm_page_free_count; int vm_page_fictitious_count; int vm_page_external_count; +natural_t *vm_page_free_bitmap; +unsigned long vm_page_free_bitmap_bitsz; + unsigned int vm_page_free_count_minimum; /* debugging */ /* @@ -174,6 +182,102 @@ boolean_t vm_page_deactivate_hint = TRUE; /* + * vm_page_free_bitmap_set and vm_page_free_bitmap_unset: + * FIXME: Free pages bitmap is SLOW! Make a decent multizone O(1) + * page allocator. + * + * Used to mark a page as free. + */ + +#ifndef NBBY +#define NBBY 8 /* size in bits of sizeof()`s unity */ +#endif +#define NBPEL (sizeof(natural_t)*NBBY) + +void vm_page_free_bitmap_set(natural_t pageno) +{ + register int word_index, bit_index; + + word_index = pageno / NBPEL; + bit_index = pageno - (word_index * NBPEL); + + vm_page_free_bitmap[word_index] |= 1 << bit_index; +} +void vm_page_free_bitmap_unset(natural_t pageno) +{ + register int word_index, bit_index; + + word_index = pageno / NBPEL; + bit_index = pageno - (word_index * NBPEL); + + vm_page_free_bitmap[word_index] &= ~(1 << bit_index); +} + +/* + * vm_page_free_bitmap_alloc: + * + * Alloc space for bitmap at initializiation time. + * FIXME: Free pages bitmap is SLOW! Make a decent multizone O(1) + * page allocator. + */ + +void vm_page_free_bitmap_alloc(void) +{ + register unsigned long size, bitsz; + unsigned long vm_page_big_pagenum; + int i; + + vm_page_big_pagenum = atop (phys_last_addr); + + bitsz = (vm_page_big_pagenum + NBPEL - 1) + & ~(NBPEL - 1); /* in bits */ + + size = bitsz / NBBY; /* in bytes */ + + vm_page_free_bitmap = (natural_t *) pmap_steal_memory (size); + bzero(vm_page_free_bitmap, size); + + vm_page_free_bitmap_bitsz = bitsz; +} + + +/* Fast phys_addr to vm_page_t lookup. */ + +static vm_page_t *vm_page_array; +static unsigned vm_page_array_size; + +static void +vm_page_array_init () +{ + int i; + + vm_page_array_size = (phys_last_addr - phys_first_addr) >> PAGE_SHIFT; + vm_page_array = (vm_page_t *) pmap_steal_memory (sizeof(vm_page_t) + * (vm_page_array_size)); + + for (i = 0; i < vm_page_array_size; i++) + vm_page_array[i] = VM_PAGE_NULL; + +} + +static void +vm_page_array_add (vm_offset_t phys_addr, vm_page_t pg) +{ + assert (phys_addr < phys_last_addr && phys_addr >= phys_first_addr); + + vm_page_array [(phys_addr - phys_first_addr) >> PAGE_SHIFT] = pg; +} + +vm_page_t +vm_page_physaddr_lookup (vm_offset_t phys_addr) +{ + assert (phys_addr < phys_last_addr && phys_addr >= phys_first_addr); + + return vm_page_array [(phys_addr - phys_first_addr) >> PAGE_SHIFT]; +} + + +/* * vm_page_bootstrap: * * Initializes the resident memory module. @@ -229,7 +333,8 @@ simple_lock_init(&vm_page_queue_free_lock); simple_lock_init(&vm_page_queue_lock); - vm_page_queue_free = VM_PAGE_NULL; + queue_init (&vm_page_queue_free_normal); + queue_init (&vm_page_queue_free_dma); vm_page_queue_fictitious = VM_PAGE_NULL; queue_init(&vm_page_queue_active); queue_init(&vm_page_queue_inactive); @@ -279,6 +384,8 @@ simple_lock_init(&bucket->lock); } + vm_page_free_bitmap_alloc(); + /* * Machine-dependent code allocates the resident page table. * It uses vm_page_init to initialize the page frames. @@ -294,7 +401,6 @@ *startp = virtual_space_start; *endp = virtual_space_end; - /* printf("vm_page_bootstrap: %d free pages\n", vm_page_free_count);*/ vm_page_free_count_minimum = vm_page_free_count; } @@ -380,6 +486,8 @@ pages = (vm_page_t) pmap_steal_memory(npages * sizeof *pages); + vm_page_array_init (); + /* * Initialize the page frames. */ @@ -389,21 +497,12 @@ break; vm_page_init(&pages[i], paddr); + vm_page_array_add (paddr, &pages[i]); + vm_page_release(&pages[i], FALSE); pages_initialized++; } /* - * Release pages in reverse order so that physical pages - * initially get allocated in ascending addresses. This keeps - * the devices (which must address physical memory) happy if - * they require several consecutive pages. - */ - - for (i = pages_initialized; i > 0; i--) { - vm_page_release(&pages[i - 1], FALSE); - } - - /* * We have to re-align virtual_space_start, * because pmap_steal_memory has been using it. */ @@ -421,7 +520,7 @@ * Second initialization pass, to be done after * the basic VM system is ready. */ -void vm_page_module_init(void) +void vm_page_module_init(void) { vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page), VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, @@ -453,6 +552,7 @@ panic("vm_page_create"); vm_page_init(m, paddr); + vm_page_array_add (paddr, m); vm_page_release(m, FALSE); } } @@ -840,16 +940,16 @@ } /* - * vm_page_grab: + * vm_page_grab_flags: * - * Remove a page from the free list. + * Remove a page specifying the memory zone to get the page from. * Returns VM_PAGE_NULL if the free list is too small. */ - -vm_page_t vm_page_grab( - boolean_t external) +vm_page_t vm_page_grab_flags( + boolean_t external, + unsigned flags) { - register vm_page_t mem; + register vm_page_t mem = VM_PAGE_NULL; simple_lock(&vm_page_queue_free_lock); @@ -867,17 +967,70 @@ return VM_PAGE_NULL; } - if (vm_page_queue_free == VM_PAGE_NULL) + /* + * If we put no flag, we request any page, so we search in + * the normal zone before. + */ + + if (!(flags & VM_PAGE_DMA) + && !(queue_empty(&vm_page_queue_free_normal))) + { + + if (--vm_page_free_count < vm_page_free_count_minimum) + vm_page_free_count_minimum = vm_page_free_count; + + if (external) + vm_page_external_count++; + + queue_remove_first (&vm_page_queue_free_normal, + mem, vm_page_t, pageq); + + mem->free = FALSE; + mem->extcounted = mem->external = external; + mem->pageq.next = 0; + mem->pageq.prev = 0; + + vm_page_free_bitmap_unset (atop (mem->phys_addr)); + } + + if (!mem) + { + + /* + * It is not necessarily a bug if we ask for a DMA page and we can't + * obtain it, despite of vm_page_free_count, since free pages can be + * in the normal zone. + */ + if (queue_empty(&vm_page_queue_free_dma)) + { + if (!(flags & VM_PAGE_DMA)) panic("vm_page_grab"); + else + { + printf ("vm_page_grab: no dma anymore"); + simple_unlock(&vm_page_queue_free_lock); + return VM_PAGE_NULL; + } + } + + + if (--vm_page_free_count < vm_page_free_count_minimum) + vm_page_free_count_minimum = vm_page_free_count; + + if (external) + vm_page_external_count++; + + queue_remove_first (&vm_page_queue_free_dma, + mem, vm_page_t, pageq); + + mem->free = FALSE; + mem->extcounted = mem->external = external; + mem->pageq.next = 0; + mem->pageq.prev = 0; + + vm_page_free_bitmap_unset (atop (mem->phys_addr)); + } - if (--vm_page_free_count < vm_page_free_count_minimum) - vm_page_free_count_minimum = vm_page_free_count; - if (external) - vm_page_external_count++; - mem = vm_page_queue_free; - vm_page_queue_free = (vm_page_t) mem->pageq.next; - mem->free = FALSE; - mem->extcounted = mem->external = external; simple_unlock(&vm_page_queue_free_lock); /* @@ -897,6 +1050,26 @@ thread_wakeup((event_t) &vm_page_free_wanted); return mem; + +} + + +/* + * vm_page_grab: + * + * Remove a page from the free list. + * Returns VM_PAGE_NULL if the free list is too small. + */ + +vm_page_t vm_page_grab( + boolean_t external) +{ + register vm_page_t mem; + + /* Get any free page, no matter what zone. */ + mem = vm_page_grab_flags (external, 0); + + return mem; } vm_offset_t vm_page_grab_phys_addr() @@ -909,13 +1082,12 @@ } /* - * vm_page_grab_contiguous_pages: + * vm_page_grab_contiguous_pages_queue: * - * Take N pages off the free list, the pages should - * cover a contiguous range of physical addresses. - * [Used by device drivers to cope with DMA limitations] + * Take N pages off the free list FREEQUEUE, the pages + * should cover a contiguous range of physical addresses. * - * Returns the page descriptors in ascending order, or + * Returns the first page descriptor, or * Returns KERN_RESOURCE_SHORTAGE if it could not. */ @@ -924,44 +1096,32 @@ vm_size_t vm_page_big_pagenum = 0; /* Set this before call! */ kern_return_t -vm_page_grab_contiguous_pages( - int npages, - vm_page_t pages[], - natural_t *bits, - boolean_t external) +vm_page_grab_contiguous_pages_queue( + int npages, + vm_offset_t *phys_addr, + boolean_t external, + queue_t freequeue, + unsigned long minbitidx, + unsigned long maxbitidx, + unsigned long align) + { register int first_set; int size, alloc_size; kern_return_t ret; vm_page_t mem, prevmem; -#ifndef NBBY -#define NBBY 8 /* size in bits of sizeof()`s unity */ -#endif + if (!align) + align = 1; -#define NBPEL (sizeof(natural_t)*NBBY) + if (minbitidx >= vm_page_free_bitmap_bitsz) + panic ("minbitidx too high."); - size = (vm_page_big_pagenum + NBPEL - 1) - & ~(NBPEL - 1); /* in bits */ - - size = size / NBBY; /* in bytes */ - - /* - * If we are called before the VM system is fully functional - * the invoker must provide us with the work space. [one bit - * per page starting at phys 0 and up to vm_page_big_pagenum] - */ - if (bits == 0) { - alloc_size = round_page(size); - if (kmem_alloc_wired(kernel_map, - (vm_offset_t *)&bits, - alloc_size) - != KERN_SUCCESS) - return KERN_RESOURCE_SHORTAGE; - } else - alloc_size = 0; - - bzero(bits, size); + if (maxbitidx > vm_page_free_bitmap_bitsz) { + printf ("%s: maxbitidx exceeds bitmap size (%x > %x).\n", + __FUNCTION__, maxbitidx, vm_page_free_bitmap_bitsz); + maxbitidx = vm_page_free_bitmap_bitsz; + } /* * A very large granularity call, its rare so that is ok @@ -972,32 +1132,16 @@ * Do not dip into the reserved pool. */ - if ((vm_page_free_count < vm_page_free_reserved) - || (vm_page_external_count >= vm_page_external_limit)) { + if (((vm_page_free_count < vm_page_free_reserved) + || (external + && (vm_page_external_count > vm_page_external_limit))) + && !current_thread()->vm_privilege) { simple_unlock(&vm_page_queue_free_lock); return KERN_RESOURCE_SHORTAGE; } /* - * First pass through, build a big bit-array of - * the pages that are free. It is not going to - * be too large anyways, in 4k we can fit info - * for 32k pages. - */ - mem = vm_page_queue_free; - while (mem) { - register int word_index, bit_index; - - bit_index = (mem->phys_addr >> PAGE_SHIFT); - word_index = bit_index / NBPEL; - bit_index = bit_index - (word_index * NBPEL); - bits[word_index] |= 1 << bit_index; - - mem = (vm_page_t) mem->pageq.next; - } - - /* - * Second loop. Scan the bit array for NPAGES + * First loop. Scan the bit array for NPAGES * contiguous bits. That gives us, if any, * the range of pages we will be grabbing off * the free list. @@ -1007,9 +1151,13 @@ first_set = 0; - for (i = 0; i < size; i += sizeof(natural_t)) { + for (i = (minbitidx/NBBY); + i < (maxbitidx/NBBY); + i += sizeof(natural_t)) + { - register natural_t v = bits[i / sizeof(natural_t)]; + register natural_t v = + vm_page_free_bitmap[i / sizeof(natural_t)]; register int bitpos; /* @@ -1042,14 +1190,20 @@ */ bits_so_far = 0; count_zeroes: - while ((bitpos < NBPEL) && ((v & 1) == 0)) { + while ((bitpos < NBPEL) && + (((v & 1) == 0) + || ((bitpos + i*NBBY) % align))) + { bitpos++; v >>= 1; } - if (v & 1) { + + if ((v & 1) + && (!((bitpos + i*NBBY) % align))) + { first_set = (i * NBBY) + bitpos; goto count_ones; - } + } } /* * No luck @@ -1063,7 +1217,6 @@ */ not_found_em: simple_unlock(&vm_page_queue_free_lock); - ret = KERN_RESOURCE_SHORTAGE; goto out; @@ -1079,43 +1232,33 @@ vm_page_free_count_minimum = vm_page_free_count; if (external) vm_page_external_count += npages; + { - register vm_offset_t first_phys, last_phys; - - /* cache values for compare */ - first_phys = first_set << PAGE_SHIFT; - last_phys = first_phys + (npages << PAGE_SHIFT);/* not included */ - - /* running pointers */ - mem = vm_page_queue_free; - prevmem = VM_PAGE_NULL; - - while (mem) { - - register vm_offset_t addr; - - addr = mem->phys_addr; - - if ((addr >= first_phys) && - (addr < last_phys)) { - if (prevmem) - prevmem->pageq.next = mem->pageq.next; - pages[(addr - first_phys) >> PAGE_SHIFT] = mem; - mem->free = FALSE; - mem->extcounted = mem->external = external; - /* - * Got them all ? - */ - if (--npages == 0) break; - } else - prevmem = mem; - - mem = (vm_page_t) mem->pageq.next; + vm_offset_t first_phys; + vm_page_t pg; + int i; + + first_phys = first_set << PAGE_SHIFT; + + if (phys_addr) + *phys_addr = first_phys; + + for (i = 0; i < npages; i++) + { + pg = vm_page_physaddr_lookup (first_phys + (i << PAGE_SHIFT)); + + assert (pg != VM_PAGE_NULL); + + queue_remove (freequeue, pg, vm_page_t, pageq); + + pg->free = FALSE; + pg->extcounted = pg->external = external; + vm_page_free_bitmap_unset (atop (pg->phys_addr)); } } - + simple_unlock(&vm_page_queue_free_lock); - + /* * Decide if we should poke the pageout daemon. * We do this if the free count is less than the low @@ -1134,8 +1277,74 @@ ret = KERN_SUCCESS; out: - if (alloc_size) - kmem_free(kernel_map, (vm_offset_t) bits, alloc_size); + + return ret; +} + +/* + * vm_page_grab_contiguous_pages_flags: + * + * Take N pages from specified zone, the pages should + * cover a contiguous range of physical addresses. + * [Used by device drivers to cope with DMA limitations] + * + * Returns the page descriptors in ascending order, or + * Returns KERN_RESOURCE_SHORTAGE if it could not. + */ + +kern_return_t +vm_page_grab_contiguous_pages_flags( + int npages, + vm_offset_t *phys_addr, + boolean_t external, + unsigned flags, + unsigned long align) +{ + kern_return_t ret; + + if (!(flags & VM_PAGE_DMA)) + { + ret = vm_page_grab_contiguous_pages_queue ( + npages, phys_addr, external, + &vm_page_queue_free_normal, + atop(vm_page_normal_first), + atop(vm_page_normal_last), + align); + + if (ret == KERN_SUCCESS) + return ret; + }; + + ret = vm_page_grab_contiguous_pages_queue ( + npages, phys_addr, external, + &vm_page_queue_free_dma, + atop(vm_page_dma_first), + atop(vm_page_dma_last), + align); + + return ret; +} + +/* + * vm_page_grab_contiguous_pages: + * + * Take N pages off the free list, the pages should + * cover a contiguous range of physical addresses. + * + * Returns the page descriptors in ascending order, or + * Returns KERN_RESOURCE_SHORTAGE if it could not. + * [Used by device drivers to cope with DMA limitations] + */ +kern_return_t +vm_page_grab_contiguous_pages( + int npages, + queue_t pages, + vm_offset_t *phys_addr, + boolean_t e) +{ + kern_return_t ret; + + ret = vm_page_grab_contiguous_pages_flags (npages, phys_addr, e, 0, 0); return ret; } @@ -1150,16 +1359,36 @@ register vm_page_t mem, boolean_t external) { + queue_t freequeue; + + if (pmap_is_dma (mem->phys_addr)) + freequeue = &vm_page_queue_free_dma; + else if (pmap_is_normal (mem->phys_addr)) + freequeue = &vm_page_queue_free_normal; + else { + /* XXX - Don't put a panic here. it's just for now. */ + panic ("vm_page_release (unknown page zone)"); + } + + /* UGLY: We skip the page 0, since it may cause problems + when returned to drivers. */ + if (mem->phys_addr == 0) + return; + simple_lock(&vm_page_queue_free_lock); + if (mem->free) panic("vm_page_release"); mem->free = TRUE; - mem->pageq.next = (queue_entry_t) vm_page_queue_free; - vm_page_queue_free = mem; + + queue_enter (freequeue, mem, vm_page_t, pageq); + vm_page_free_count++; if (external) vm_page_external_count--; + vm_page_free_bitmap_set (atop (mem->phys_addr)); + /* * Check if we should wake up someone waiting for page. * But don't bother waking them unless they can allocate. -- It was a type of people I did not know, I found them very strange and they did not inspire confidence at all. Later I learned that I had been introduced to electronic engineers. E. W. Dijkstra _______________________________________________ Bug-hurd mailing list Bug-hurd@gnu.org http://lists.gnu.org/mailman/listinfo/bug-hurd