Previously, we used contiguous page directories four pages in length when using PAE. To prevent physical memory fragmentation, we need to use virtual memory for objects spanning multiple pages. Virtual kernel memory, however, is a scarce commodity.
* i386/intel/pmap.h (lin2pdenum): Never include the page directory pointer table index. (lin2pdenum_cont): New macro which does include said index. (struct pmap): Remove the directory base pointer when using PAE. * i386/intel/pmap.c (pmap_pde): Fix lookup. (pmap_pte): Fix check for uninitialized pmap. (pmap_bootstrap): Do not store the page directory base if PAE. (pmap_init): Reduce size of page directories to one page, use direct-mapped memory. (pmap_create): Allocate four page directories per pmap. (pmap_destroy): Adapt to the discontinuous directories. (pmap_collect): Likewise. * i386/i386/xen.h (hyp_mmu_update_la): Adapt code manipulating the kernels page directory. * i386/i386at/model_dep.c (i386at_init): Likewise. --- i386/i386/xen.h | 2 +- i386/i386at/model_dep.c | 16 +++--- i386/intel/pmap.c | 129 +++++++++++++++++++++++++++++++++++++----------- i386/intel/pmap.h | 14 ++++-- 4 files changed, 118 insertions(+), 43 deletions(-) diff --git a/i386/i386/xen.h b/i386/i386/xen.h index b434dda..debf278 100644 --- a/i386/i386/xen.h +++ b/i386/i386/xen.h @@ -169,7 +169,7 @@ MACH_INLINE int hyp_mmu_update_pte(pt_entry_t pte, pt_entry_t val) #define HYP_BATCH_MMU_UPDATES 256 #define hyp_mmu_update_la(la, val) hyp_mmu_update_pte( \ - (kernel_pmap->dirbase[lin2pdenum((vm_offset_t)(la))] & INTEL_PTE_PFN) \ + (kernel_page_dir[lin2pdenum_cont((vm_offset_t)(la))] & INTEL_PTE_PFN) \ + ptenum((vm_offset_t)(la)) * sizeof(pt_entry_t), val) #endif diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c index 239f63f..aa6c2c1 100644 --- a/i386/i386at/model_dep.c +++ b/i386/i386at/model_dep.c @@ -430,14 +430,14 @@ i386at_init(void) delta = (vm_offset_t)(-delta); nb_direct = delta >> PDESHIFT; for (i = 0; i < nb_direct; i++) - kernel_page_dir[lin2pdenum(INIT_VM_MIN_KERNEL_ADDRESS) + i] = - kernel_page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS) + i]; + kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) + i] = + kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS) + i]; #endif /* We need BIOS memory mapped at 0xc0000 & co for Linux drivers */ #ifdef LINUX_DEV #if VM_MIN_KERNEL_ADDRESS != 0 - kernel_page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] = - kernel_page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)]; + kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] = + kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)]; #endif #endif @@ -489,21 +489,21 @@ i386at_init(void) for (i = 0 ; i < nb_direct; i++) { #ifdef MACH_XEN #ifdef MACH_PSEUDO_PHYS - if (!hyp_mmu_update_pte(kv_to_ma(&kernel_page_dir[lin2pdenum(VM_MIN_KERNEL_ADDRESS) + i]), 0)) + if (!hyp_mmu_update_pte(kv_to_ma(&kernel_page_dir[lin2pdenum_cont(VM_MIN_KERNEL_ADDRESS) + i]), 0)) #else /* MACH_PSEUDO_PHYS */ if (hyp_do_update_va_mapping(VM_MIN_KERNEL_ADDRESS + i * INTEL_PGBYTES, 0, UVMF_INVLPG | UVMF_ALL)) #endif /* MACH_PSEUDO_PHYS */ printf("couldn't unmap frame %d\n", i); #else /* MACH_XEN */ - kernel_page_dir[lin2pdenum(INIT_VM_MIN_KERNEL_ADDRESS) + i] = 0; + kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) + i] = 0; #endif /* MACH_XEN */ } #endif /* Keep BIOS memory mapped */ #ifdef LINUX_DEV #if VM_MIN_KERNEL_ADDRESS != 0 - kernel_page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] = - kernel_page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)]; + kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] = + kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)]; #endif #endif diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c index b143dd7..ab983ed 100644 --- a/i386/intel/pmap.c +++ b/i386/intel/pmap.c @@ -421,9 +421,15 @@ static pmap_mapwindow_t mapwindows[PMAP_NMAPWINDOWS]; static inline pt_entry_t * pmap_pde(const pmap_t pmap, vm_offset_t addr) { + pt_entry_t *page_dir; if (pmap == kernel_pmap) addr = kvtolin(addr); - return &pmap->dirbase[lin2pdenum(addr)]; +#if PAE + page_dir = (pt_entry_t *) ptetokv(pmap->pdpbase[lin2pdpnum(addr)]); +#else + page_dir = pmap->dirbase; +#endif + return &page_dir[lin2pdenum(addr)]; } /* @@ -439,8 +445,13 @@ pmap_pte(const pmap_t pmap, vm_offset_t addr) pt_entry_t *ptp; pt_entry_t pte; +#if PAE + if (pmap->pdpbase == 0) + return(PT_ENTRY_NULL); +#else if (pmap->dirbase == 0) return(PT_ENTRY_NULL); +#endif pte = *pmap_pde(pmap, addr); if ((pte & INTEL_PTE_VALID) == 0) return(PT_ENTRY_NULL); @@ -623,13 +634,16 @@ void pmap_bootstrap(void) { vm_offset_t addr; init_alloc_aligned(PDPNUM * INTEL_PGBYTES, &addr); - kernel_pmap->dirbase = kernel_page_dir = (pt_entry_t*)phystokv(addr); + kernel_page_dir = (pt_entry_t*)phystokv(addr); } kernel_pmap->pdpbase = (pt_entry_t*)phystokv(pmap_grab_page()); { int i; for (i = 0; i < PDPNUM; i++) - WRITE_PTE(&kernel_pmap->pdpbase[i], pa_to_pte(_kvtophys((void *) kernel_pmap->dirbase + i * INTEL_PGBYTES)) | INTEL_PTE_VALID); + WRITE_PTE(&kernel_pmap->pdpbase[i], + pa_to_pte(_kvtophys((void *) kernel_page_dir + + i * INTEL_PGBYTES)) + | INTEL_PTE_VALID); } #else /* PAE */ kernel_pmap->dirbase = kernel_page_dir = (pt_entry_t*)phystokv(pmap_grab_page()); @@ -637,7 +651,7 @@ void pmap_bootstrap(void) { unsigned i; for (i = 0; i < NPDES; i++) - kernel_pmap->dirbase[i] = 0; + kernel_page_dir[i] = 0; } #ifdef MACH_PV_PAGETABLES @@ -710,7 +724,7 @@ void pmap_bootstrap(void) */ for (va = phystokv(0); va >= phystokv(0) && va < kernel_virtual_end; ) { - pt_entry_t *pde = kernel_page_dir + lin2pdenum(kvtolin(va)); + pt_entry_t *pde = kernel_page_dir + lin2pdenum_cont(kvtolin(va)); pt_entry_t *ptable = (pt_entry_t*)phystokv(pmap_grab_page()); pt_entry_t *pte; @@ -983,11 +997,13 @@ void pmap_init(void) s = (vm_size_t) sizeof(struct pmap); kmem_cache_init(&pmap_cache, "pmap", s, 0, NULL, 0); kmem_cache_init(&pd_cache, "pd", - PDPNUM * INTEL_PGBYTES, INTEL_PGBYTES, NULL, 0); + INTEL_PGBYTES, INTEL_PGBYTES, NULL, + KMEM_CACHE_PHYSMEM); #if PAE kmem_cache_init(&pdpt_cache, "pdpt", PDPNUM * sizeof(pt_entry_t), - PDPNUM * sizeof(pt_entry_t), NULL, 0); + PDPNUM * sizeof(pt_entry_t), NULL, + KMEM_CACHE_PHYSMEM); #endif s = (vm_size_t) sizeof(struct pv_entry); kmem_cache_init(&pv_list_cache, "pv_entry", s, 0, NULL, 0); @@ -1147,6 +1163,8 @@ pmap_page_table_page_dealloc(vm_offset_t pa) */ pmap_t pmap_create(vm_size_t size) { + pt_entry_t *page_dir[PDPNUM]; + int i; pmap_t p; pmap_statistics_t stats; @@ -1167,43 +1185,63 @@ pmap_t pmap_create(vm_size_t size) if (p == PMAP_NULL) return PMAP_NULL; - p->dirbase = (pt_entry_t *) kmem_cache_alloc(&pd_cache); - if (p->dirbase == NULL) { - kmem_cache_free(&pmap_cache, (vm_address_t) p); - return PMAP_NULL; + for (i = 0; i < PDPNUM; i++) { + page_dir[i] = (pt_entry_t *) kmem_cache_alloc(&pd_cache); + if (page_dir[i] == NULL) { + i -= 1; + while (i >= 0) { + kmem_cache_free(&pd_cache, + (vm_address_t) page_dir[i]); + i -= 1; + } + kmem_cache_free(&pmap_cache, (vm_address_t) p); + return PMAP_NULL; + } + memcpy(page_dir[i], + (void *) kernel_page_dir + i * INTEL_PGBYTES, + INTEL_PGBYTES); } - memcpy(p->dirbase, kernel_page_dir, PDPNUM * INTEL_PGBYTES); #ifdef LINUX_DEV #if VM_MIN_KERNEL_ADDRESS != 0 /* Do not map BIOS in user tasks */ - p->dirbase[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] = 0; + page_dir +#if PAE + [lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] +#else + [0] +#endif + [lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] + = 0; #endif #endif #ifdef MACH_PV_PAGETABLES { - int i; for (i = 0; i < PDPNUM; i++) - pmap_set_page_readonly((void*) p->dirbase + i * INTEL_PGBYTES); + pmap_set_page_readonly((void *) page_dir[i]); } #endif /* MACH_PV_PAGETABLES */ #if PAE p->pdpbase = (pt_entry_t *) kmem_cache_alloc(&pdpt_cache); if (p->pdpbase == NULL) { - kmem_cache_free(&pd_cache, (vm_address_t) p->dirbase); + for (i = 0; i < PDPNUM; i++) + kmem_cache_free(&pd_cache, (vm_address_t) page_dir[i]); kmem_cache_free(&pmap_cache, (vm_address_t) p); return PMAP_NULL; } { - int i; for (i = 0; i < PDPNUM; i++) - WRITE_PTE(&p->pdpbase[i], pa_to_pte(kvtophys((vm_offset_t) p->dirbase + i * INTEL_PGBYTES)) | INTEL_PTE_VALID); + WRITE_PTE(&p->pdpbase[i], + pa_to_pte(kvtophys((vm_offset_t) page_dir[i])) + | INTEL_PTE_VALID); } #ifdef MACH_PV_PAGETABLES pmap_set_page_readonly(p->pdpbase); #endif /* MACH_PV_PAGETABLES */ +#else /* PAE */ + p->dirbase = page_dir[0]; #endif /* PAE */ p->ref_count = 1; @@ -1230,6 +1268,11 @@ pmap_t pmap_create(vm_size_t size) void pmap_destroy(pmap_t p) { +#if PAE + int i; +#endif + boolean_t free_all; + pt_entry_t *page_dir; pt_entry_t *pdep; phys_addr_t pa; int c, s; @@ -1248,12 +1291,23 @@ void pmap_destroy(pmap_t p) return; /* still in use */ } +#if PAE + for (i = 0; i <= lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS); i++) { + free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS); + page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]); +#else + free_all = FALSE; + page_dir = p->dirbase; +#endif + /* * Free the memory maps, then the * pmap structure. */ - for (pdep = p->dirbase; - pdep < &p->dirbase[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)]; + for (pdep = page_dir; + (free_all + || pdep < &page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)]) + && pdep < &page_dir[NPTES]; pdep += ptes_per_vm_page) { if (*pdep & INTEL_PTE_VALID) { pa = pte_to_pa(*pdep); @@ -1274,14 +1328,12 @@ void pmap_destroy(pmap_t p) } } #ifdef MACH_PV_PAGETABLES - { - int i; - for (i = 0; i < PDPNUM; i++) - pmap_set_page_readwrite((void*) p->dirbase + i * INTEL_PGBYTES); - } + pmap_set_page_readwrite((void*) page_dir); #endif /* MACH_PV_PAGETABLES */ - kmem_cache_free(&pd_cache, (vm_offset_t) p->dirbase); + kmem_cache_free(&pd_cache, (vm_offset_t) page_dir); #if PAE + } + #ifdef MACH_PV_PAGETABLES pmap_set_page_readwrite(p->pdpbase); #endif /* MACH_PV_PAGETABLES */ @@ -2202,6 +2254,9 @@ void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) */ void pmap_collect(pmap_t p) { + int i; + boolean_t free_all; + pt_entry_t *page_dir; pt_entry_t *pdp, *ptp; pt_entry_t *eptp; phys_addr_t pa; @@ -2213,12 +2268,24 @@ void pmap_collect(pmap_t p) if (p == kernel_pmap) return; +#if PAE + for (i = 0; i <= lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS); i++) { + free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS); + page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]); +#else + i = 0; + free_all = FALSE; + page_dir = p->dirbase; +#endif + /* * Garbage collect map. */ PMAP_READ_LOCK(p, spl); - for (pdp = p->dirbase; - pdp < &p->dirbase[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)]; + for (pdp = page_dir; + (free_all + || pdp < &page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)]) + && pdp < &page_dir[NPTES]; pdp += ptes_per_vm_page) { if (*pdp & INTEL_PTE_VALID) { @@ -2246,7 +2313,8 @@ void pmap_collect(pmap_t p) * Remove the virtual addresses mapped by this pte page. */ { /*XXX big hack*/ - vm_offset_t va = pdenum2lin(pdp - p->dirbase); + vm_offset_t va = pdenum2lin(pdp - page_dir + + i * NPTES); if (p == kernel_pmap) va = lintokv(va); pmap_remove_range(p, @@ -2299,6 +2367,9 @@ void pmap_collect(pmap_t p) } } } +#if PAE + } +#endif PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS); PMAP_READ_UNLOCK(p, spl); diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h index e6a3ede..ee600cd 100644 --- a/i386/intel/pmap.h +++ b/i386/intel/pmap.h @@ -89,11 +89,14 @@ typedef phys_addr_t pt_entry_t; /* * Convert linear offset to page descriptor index */ +#define lin2pdenum(a) (((a) >> PDESHIFT) & PDEMASK) + #if PAE -/* Making it include the page directory pointer table index too */ -#define lin2pdenum(a) (((a) >> PDESHIFT) & 0x7ff) +/* Special version assuming contiguous page directories. Making it + include the page directory pointer table index too. */ +#define lin2pdenum_cont(a) (((a) >> PDESHIFT) & 0x7ff) #else -#define lin2pdenum(a) (((a) >> PDESHIFT) & PDEMASK) +#define lin2pdenum_cont(a) lin2pdenum(a) #endif /* @@ -159,10 +162,11 @@ typedef volatile long cpu_set; /* set of CPUs - must be <= 32 */ /* changed by other processors */ struct pmap { +#if ! PAE pt_entry_t *dirbase; /* page directory table */ -#if PAE +#else pt_entry_t *pdpbase; /* page directory pointer table */ -#endif /* PAE */ +#endif /* ! PAE */ int ref_count; /* reference count */ decl_simple_lock_data(,lock) /* lock on map */ -- 2.9.3