Hello, When a translator needs access to a portion of a memory object (i.e. in answer for read()/write() functions), it usually must map part of that object into its own space for later copying the data to the destination buffer (vm_read/vm_write/vm_copy doesn't support working with memory_objects nor unaligned addresses). This behaviour could be suboptimal.
I'm trying to implement an easy and fast way to read/write from a memory object without the need of mapping its contents. This change consists of: - kern_return_t vm_fault_copy_tmp() [vm_fault_copy_tmp.c] This is a copy of vm_fault_copy with support for unaligned addresses. It can copy pages from a memory object to a userspace buffer. Right now, this buffer must contain something in each page to be sure that its pages are already allocated (this will be easily solved by manually faulting the page when vm_page_lookup() returns VM_PAGE_NULL). - kern_return_t vm_read_fast() [vm_read_fast.c] This is the RPC exported to the user. It looks for the proper entry in the target task's map, checks its size, checks the object and calls vm_fault_copy_tmp. A "pager_memcpy" that makes use of this functions may look like this: error_t pager_memcpy_direct (struct pager *pager, memory_object_t memobj, vm_offset_t offset, task_t target_task, vm_address_t address, size_t *size, vm_prot_t prot) { error_t err = 0; size_t nbytes = *size; /* XXX nbytes is not properly updated right now */ err = vm_write_fast(target_task, address, nbytes, memobj, offset); return err; } Please note that the arguments have changed so it can receive target_task. This means that the RPC of the translator and libc must be changed to support this. I made my tests by exporting io-read-direct() (in libdiskfs) and read_direct (in libc) which are copies from io-read()/read() with minor changes to support this behaviour. Test results (reading a file, data already cached) -------------------------------------------------- Reading 1000 chunks of 8K each (miliseconds): Old behaviour: 260-270 vm_read_fast: 110-120 Reading 1000 chunks of 4K each (miliseconds): Old behaviour: 180-190 vm_read_fast: 90-100 Reading 1000 chunks of 1K each (miliseconds): Old behaviour: 100-110 vm_read_fast: 70-80 NOTE: I've disabled vm_copy() in pager_memcpy because it's (much) slower that simply memcpy'ing when reading. This advantage will probably increase if we convert vm_read_fast (which right now is a complex RPC, because it receives a memory_object) into a syscall or a simple RPC. TODO ---- - Make sure that one aplication can't write to another by guessing its task_t value. If it can, create an authentication mechanism (probably with mach_task_self()). - Add support for buffers of more than one map entry (vm_read_fast). - When vm_page_lookup() returns VM_PAGE_NULL, call vm_fault_page() and insert result_page into application's pmap (vm_fault_copy_tmp). - Make sure that every translator can support this behaviour. If they can't, look for a way to make both implementations compatible. - Try to convert vm_read_fast to a simple RPC or a syscall - Implement vm_write_fast() This feature is _very_ experimental, and it needs much work to be finished. But, before going any further, I will really apreciate to hear the opinion from long-time Hurd/Mach hackers (Roland?). Thanks.
kern_return_t vm_read_fast(target_map, dest_address, size, memory_object, offset) vm_map_t target_map; vm_address_t dest_address; vm_size_t size; ipc_port_t memory_object; vm_offset_t offset; { vm_map_entry_t dest_entry; vm_map_version_t version; vm_object_t object; vm_offset_t dest_pa_address = trunc_page(dest_address); kern_return_t kr; vm_map_lock(target_map); if (!vm_map_lookup_entry(target_map, dest_address, &dest_entry)) { vm_map_unlock(target_map); // panic("invalid address\n"); return(KERN_INVALID_ADDRESS); } vm_map_clip_start(target_map, dest_entry, dest_pa_address); if ((dest_entry->vme_end - (dest_entry->vme_start + (dest_address - dest_pa_address))) < size) { vm_map_unlock(target_map); // panic("map too small\n"); return(KERN_INVALID_ADDRESS); } if (!IP_VALID(memory_object) || (object = vm_object_enter(memory_object, size, FALSE)) == VM_OBJECT_NULL) { vm_map_unlock(target_map); return(KERN_INVALID_ARGUMENT); } version.main_timestamp = target_map->timestamp; vm_map_unlock(target_map); /* XXX DEBUG if (dest_entry->object.vm_object == VM_OBJECT_NULL) printf("dst_entry object null!\n"); printf("vm_read_fast: ORIGEN, offset: %d, size: %d\n", offset, size); printf("vm_read_fast: DESTINO, offset: %d\n", dest_entry->offset); */ kr = vm_fault_copy_tmp(object, offset, &size, dest_entry->object.vm_object, dest_entry->offset + (dest_address - dest_pa_address), target_map, &version, FALSE); return kr; }
kern_return_t vm_fault_copy_tmp( src_object, src_offset, src_size, dst_object, dst_offset, dst_map, dst_version, interruptible ) vm_object_t src_object; vm_offset_t src_offset; vm_size_t *src_size; /* INOUT */ vm_object_t dst_object; vm_offset_t dst_offset; vm_map_t dst_map; vm_map_version_t *dst_version; boolean_t interruptible; { vm_page_t result_page; vm_prot_t prot; vm_page_t src_page; vm_page_t src_top_page; vm_offset_t src_pa_offset, src_page_offset; vm_page_t dst_page; vm_page_t dst_top_page; vm_offset_t dst_pa_offset, dst_page_offset; vm_size_t amount_done, cpy_size; vm_object_t old_copy_object; boolean_t same_page = FALSE; #define RETURN(x) \ MACRO_BEGIN \ *src_size = amount_done; \ MACRO_RETURN(x); \ MACRO_END amount_done = src_pa_offset = dst_pa_offset = 0; do { /* while (amount_done != *src_size) */ RetrySourceFault: ; cpy_size = (*src_size - amount_done); if (cpy_size > PAGE_SIZE) cpy_size = PAGE_SIZE; src_pa_offset = trunc_page(src_offset); src_page_offset = (src_offset - src_pa_offset); if (src_object == VM_OBJECT_NULL) { /* * No source object. We will just * zero-fill the page in dst_object. */ src_page = VM_PAGE_NULL; } else if ( !same_page ) { prot = VM_PROT_READ; vm_object_lock(src_object); vm_object_paging_begin(src_object); switch (vm_fault_page(src_object, src_pa_offset, VM_PROT_READ, FALSE, interruptible, &prot, &result_page, &src_top_page, FALSE, (void (*)()) 0)) { case VM_FAULT_SUCCESS: break; case VM_FAULT_RETRY: goto RetrySourceFault; case VM_FAULT_INTERRUPTED: RETURN(MACH_SEND_INTERRUPTED); case VM_FAULT_MEMORY_SHORTAGE: VM_PAGE_WAIT((void (*)()) 0); goto RetrySourceFault; case VM_FAULT_FICTITIOUS_SHORTAGE: vm_page_more_fictitious(); goto RetrySourceFault; case VM_FAULT_MEMORY_ERROR: return(KERN_MEMORY_ERROR); } src_page = result_page; assert((src_top_page == VM_PAGE_NULL) == (src_page->object == src_object)); assert ((prot & VM_PROT_READ) != VM_PROT_NONE); vm_object_unlock(src_page->object); } RetryDestinationFault: ; prot = VM_PROT_WRITE; vm_object_lock(dst_object); /* XXX Is this needed? vm_object_paging_begin(dst_object); */ dst_pa_offset = trunc_page(dst_offset); dst_page_offset = (dst_offset - dst_pa_offset); if ((dst_page = vm_page_lookup(dst_object, dst_pa_offset)) == VM_PAGE_NULL) panic("fault_copy_tmp, page null!\n"); vm_object_unlock(dst_object); /* * Copy the page, and note that it is dirty * immediately. */ if ((PAGE_SIZE - dst_page_offset) < cpy_size) cpy_size = (PAGE_SIZE - dst_page_offset); if ((PAGE_SIZE - src_page_offset) < cpy_size) cpy_size = (PAGE_SIZE - src_page_offset); /* XXX DEBUG printf("vm_fault_copy: SOURCE src_offset: %d, src_page_offset: %d\n", src_offset, src_page_offset); printf("vm_fault_copy: TARGET dst_offset: %d, dst_page_offset: %d\n", dst_offset, dst_page_offset); printf("vm_fault_copy: COPY amount_done: %d, src_size: %d, cpy_size: %d\n", amount_done, *src_size, cpy_size); */ if (src_page == VM_PAGE_NULL) vm_page_zero_fill_tmp(dst_page, dst_page_offset, cpy_size); else vm_page_copy_tmp(src_page, src_page_offset, dst_page, dst_page_offset, cpy_size); dst_page->dirty = TRUE; /* * Unlock everything, and return */ amount_done += cpy_size; src_offset += cpy_size; dst_offset += cpy_size; if (src_page != VM_PAGE_NULL && (amount_done == *src_size) || (src_pa_offset != trunc_page(src_offset))) { vm_fault_copy_cleanup(src_page, src_top_page); same_page = FALSE; } else same_page = TRUE; } while (amount_done != *src_size); RETURN(KERN_SUCCESS); #undef RETURN /*NOTREACHED*/ }
_______________________________________________ Bug-hurd mailing list Bug-hurd@gnu.org http://lists.gnu.org/mailman/listinfo/bug-hurd