With this change, address space dispatch can be moved outside the BQL. The actual I/O would still have to happen within the lock.
The next step would be to introduce a function that can only be called from outside the BQL, address_space_rw_unlocked. The function would do something like mr = address_space_translate(...) if (!mr->unlocked) { locked = true; qemu_mutex_lock_iothread(); } ...dispatch... if (locked) { qemu_mutex_unlock_iothread(); } (Note that subpages are already ready to be changed to unlocked access). Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- cputlb.c | 4 +++- exec.c | 30 ++++++++++++++++++++++++++---- hw/ppc/spapr_iommu.c | 10 +++++++++- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/cputlb.c b/cputlb.c index 82875b1..97bfe70 100644 --- a/cputlb.c +++ b/cputlb.c @@ -267,8 +267,9 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr, tlb_add_large_page(env, vaddr, size); } + rcu_read_lock(); sz = size; - d = address_space_memory.dispatch; + d = rcu_dereference(&address_space_memory.dispatch); section = address_space_translate_for_iotlb(d, paddr, &xlat, &sz); assert(sz >= TARGET_PAGE_SIZE); @@ -321,6 +322,7 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr, } else { te->addr_write = -1; } + rcu_read_unlock(); } /* NOTE: this function can trigger an exception */ diff --git a/exec.c b/exec.c index e564014..8c6f925 100644 --- a/exec.c +++ b/exec.c @@ -97,6 +97,8 @@ struct PhysPageEntry { typedef PhysPageEntry Node[L2_SIZE]; struct AddressSpaceDispatch { + struct rcu_head rcu; + /* This is a multi-level map on the physical address space. * The bottom level has pointers to MemoryRegionSections. */ @@ -120,6 +122,8 @@ typedef struct subpage_t { #define PHYS_SECTION_WATCH 3 typedef struct PhysPageMap { + struct rcu_head rcu; + unsigned sections_nb; unsigned sections_nb_alloc; unsigned nodes_nb; @@ -236,6 +240,7 @@ bool memory_region_is_unassigned(MemoryRegion *mr) && mr != &io_mem_watch; } +/* Called from RCU critical section */ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, hwaddr addr, bool resolve_subpage) @@ -252,6 +257,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, return section; } +/* Called from RCU critical section */ static MemoryRegionSection * address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat, hwaddr *plen, bool resolve_subpage) @@ -280,8 +286,10 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, MemoryRegion *mr; hwaddr len = *plen; + rcu_read_lock(); for (;;) { - section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true); + AddressSpaceDispatch *d = rcu_dereference(&as->dispatch); + section = address_space_translate_internal(d, addr, &addr, plen, true); mr = section->mr; if (!mr->iommu_ops) { @@ -303,9 +311,11 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, *plen = len; *xlat = addr; memory_region_ref(mr); + rcu_read_unlock(); return mr; } +/* Called from RCU critical section */ MemoryRegionSection * address_space_translate_for_iotlb(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat, hwaddr *plen) @@ -727,6 +737,7 @@ static int cpu_physical_memory_set_dirty_tracking(int enable) return ret; } +/* Called from RCU critical section */ hwaddr memory_region_section_get_iotlb(AddressSpaceDispatch *d, CPUArchState *env, MemoryRegionSection *section, @@ -1706,6 +1717,11 @@ static uint16_t dummy_section(MemoryRegion *mr) MemoryRegion *iotlb_to_region(hwaddr index) { + /* This assumes that address_space_memory.dispatch->sections + * does not change between address_space_translate_for_iotlb and + * here. This is currently true because TCG runs within the + * BQL. + */ return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr; } @@ -1762,11 +1778,12 @@ static void core_begin(MemoryListener *listener) } /* This listener's commit run after the other AddressSpaceDispatch listeners'. - * All AddressSpaceDispatch instances have switched to the next map. + * All AddressSpaceDispatch instances have switched to the next map, but + * there may be concurrent readers. */ static void core_commit(MemoryListener *listener) { - phys_sections_free(prev_map); + call_rcu(prev_map, phys_sections_free, rcu); } static void tcg_commit(MemoryListener *listener) @@ -1816,13 +1833,18 @@ void address_space_init_dispatch(AddressSpace *as) memory_listener_register(&as->dispatch_listener, as); } +static void address_space_dispatch_free(AddressSpaceDispatch *d) +{ + g_free(d); +} + void address_space_destroy_dispatch(AddressSpace *as) { AddressSpaceDispatch *d = as->dispatch; memory_listener_unregister(&as->dispatch_listener); - g_free(d); as->dispatch = NULL; + call_rcu(d, address_space_dispatch_free, rcu); } static void memory_map_init(void) diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 89b33a5..e16b94b 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -37,6 +37,7 @@ enum sPAPRTCEAccess { }; struct sPAPRTCETable { + struct rcu_head rcu; uint32_t liobn; uint32_t window_size; sPAPRTCE *table; @@ -68,6 +69,8 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) return NULL; } +/* Called within RCU critical section */ + static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr) { sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu); @@ -159,7 +162,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, size_t wi return tcet; } -void spapr_tce_free(sPAPRTCETable *tcet) +static void spapr_tce_do_free(sPAPRTCETable *tcet) { QLIST_REMOVE(tcet, list); @@ -172,6 +175,11 @@ void spapr_tce_free(sPAPRTCETable *tcet) g_free(tcet); } +void spapr_tce_free(sPAPRTCETable *tcet) +{ + call_rcu(tcet, spapr_tce_do_free, rcu); +} + MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet) { return &tcet->iommu; -- 1.8.1.4