* David Hildenbrand (da...@redhat.com) wrote: > During precopy we usually write all plugged ares and essentially > allocate them. However, there are two corner cases: > > 1) Migrating the zeropage > > When the zeropage gets migrated, we first check if the destination range is > already zero and avoid performing a write in that case: > ram_handle_compressed(). If the memory backend, like anonymous RAM or > most filesystems, populate the shared zeropage when reading a (file) hole, > we don't preallocate backend memory. In that case, we have to explicitly > trigger the allocation to allocate actual backend memory. > > 2) Excluding memory ranges during migration > > For example, virtio-balloon free page hinting will exclude some pages > from getting migrated. In that case, we don't allocate memory for > plugged ranges when migrating. > > So trigger allocation of all plugged ranges when restoring the device state > and fail gracefully if allocation fails. > > Handling postcopy is a bit more tricky, as postcopy and preallocation > are problematic in general. To at least mimic what ordinary > preallocation does, temporarily try allocating the requested amount > of memory and fail postcopy in case the requested size on source and > destination doesn't match. This way, we at least checked that there isn't > a fundamental configuration issue and that we were able to preallocate > the required amount of memory at least once, instead of failing > unrecoverably during postcopy later. However, just as ordinary > preallocation with postcopy, it's racy. > > Signed-off-by: David Hildenbrand <da...@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> > --- > hw/virtio/virtio-mem.c | 136 +++++++++++++++++++++++++++++++++ > include/hw/virtio/virtio-mem.h | 6 ++ > 2 files changed, 142 insertions(+) > > diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c > index 6c337db0a7..f48e684aa9 100644 > --- a/hw/virtio/virtio-mem.c > +++ b/hw/virtio/virtio-mem.c > @@ -27,6 +27,7 @@ > #include "qapi/visitor.h" > #include "exec/ram_addr.h" > #include "migration/misc.h" > +#include "migration/postcopy-ram.h" > #include "hw/boards.h" > #include "hw/qdev-properties.h" > #include CONFIG_DEVICES > @@ -193,6 +194,30 @@ static int virtio_mem_for_each_unplugged_range(const > VirtIOMEM *vmem, void *arg, > return ret; > } > > +static int virtio_mem_for_each_plugged_range(const VirtIOMEM *vmem, void > *arg, > + virtio_mem_range_cb cb) > +{ > + unsigned long first_bit, last_bit; > + uint64_t offset, size; > + int ret = 0; > + > + first_bit = find_first_bit(vmem->bitmap, vmem->bitmap_size); > + while (first_bit < vmem->bitmap_size) { > + offset = first_bit * vmem->block_size; > + last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, > + first_bit + 1) - 1; > + size = (last_bit - first_bit + 1) * vmem->block_size; > + > + ret = cb(vmem, arg, offset, size); > + if (ret) { > + break; > + } > + first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, > + last_bit + 2); > + } > + return ret; > +} > + > /* > * Adjust the memory section to cover the intersection with the given range. > * > @@ -819,6 +844,7 @@ static void virtio_mem_device_realize(DeviceState *dev, > Error **errp) > if (!vmem->block_size) { > vmem->block_size = virtio_mem_default_block_size(rb); > } > + vmem->initial_requested_size = vmem->requested_size; > > if (vmem->block_size < page_size) { > error_setg(errp, "'%s' property has to be at least the page size > (0x%" > @@ -879,6 +905,7 @@ static void virtio_mem_device_realize(DeviceState *dev, > Error **errp) > */ > memory_region_set_ram_discard_manager(&vmem->memdev->mr, > RAM_DISCARD_MANAGER(vmem)); > + postcopy_add_notifier(&vmem->postcopy_notifier); > } > > static void virtio_mem_device_unrealize(DeviceState *dev) > @@ -886,6 +913,7 @@ static void virtio_mem_device_unrealize(DeviceState *dev) > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > VirtIOMEM *vmem = VIRTIO_MEM(dev); > > + postcopy_remove_notifier(&vmem->postcopy_notifier); > /* > * The unplug handler unmapped the memory region, it cannot be > * found via an address space anymore. Unset ourselves. > @@ -915,12 +943,119 @@ static int virtio_mem_restore_unplugged(VirtIOMEM > *vmem) > virtio_mem_discard_range_cb); > } > > +static int virtio_mem_prealloc_range(const VirtIOMEM *vmem, uint64_t offset, > + uint64_t size) > +{ > + void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset; > + int fd = memory_region_get_fd(&vmem->memdev->mr); > + Error *local_err = NULL; > + > + os_mem_prealloc(fd, area, size, 1, &local_err); > + if (local_err) { > + error_report_err(local_err); > + return -ENOMEM; > + } > + return 0; > +} > + > +static int virtio_mem_prealloc_range_cb(const VirtIOMEM *vmem, void *arg, > + uint64_t offset, uint64_t size) > +{ > + return virtio_mem_prealloc_range(vmem, offset, size); > +} > + > +static int virtio_mem_restore_prealloc(VirtIOMEM *vmem) > +{ > + /* > + * Make sure any preallocated memory is really preallocated. Migration > + * might have skipped some pages or optimized for the zeropage. > + */ > + return virtio_mem_for_each_plugged_range(vmem, NULL, > + virtio_mem_prealloc_range_cb); > +} > + > +static int virtio_mem_postcopy_notify(NotifierWithReturn *notifier, > + void *opaque) > +{ > + struct PostcopyNotifyData *pnd = opaque; > + VirtIOMEM *vmem = container_of(notifier, VirtIOMEM, postcopy_notifier); > + RAMBlock *rb = vmem->memdev->mr.ram_block; > + int ret; > + > + if (pnd->reason != POSTCOPY_NOTIFY_INBOUND_ADVISE || !vmem->prealloc || > + !vmem->initial_requested_size) { > + return 0; > + } > + assert(!vmem->size); > + > + /* > + * When creating the device we discard all memory and we don't know > + * which blocks the source has plugged (and should be preallocated) > until we > + * restore the device state. However, we cannot allocate when restoring > the > + * device state either if postcopy is already active. > + * > + * If we reach this point, postcopy is possible and we have preallocation > + * enabled. > + * > + * Temporarily allocate the requested size to see if there is a > fundamental > + * configuration issue that would make postcopy fail because the memory > + * backend is out of memory. While this increases reliability, > + * prealloc+postcopy cannot be fully reliable: see the comment in > + * virtio_mem_post_load(). > + */ > + ret = virtio_mem_prealloc_range(vmem, 0, vmem->initial_requested_size); > + if (ram_block_discard_range(rb, 0, vmem->initial_requested_size)) { > + ret = ret ? ret : -EINVAL; > + return ret; > + } > + return 0; > +} > + > static int virtio_mem_post_load(void *opaque, int version_id) > { > VirtIOMEM *vmem = VIRTIO_MEM(opaque); > RamDiscardListener *rdl; > int ret; > > + if (vmem->prealloc) { > + if (migration_in_incoming_postcopy()) { > + /* > + * Prealloc with postcopy cannot possibly work fully reliable in > + * general: preallocation has to populate all memory immediately > and > + * fail gracefully before the guest started running on the > + * destination while postcopy wants to discard memory and > populate > + * on demand after the guest started running on the destination. > + * > + * For ordinary memory backends, "prealloc=on" is essentially > + * overridden by postcopy, which will simply discard preallocated > + * pages and might fail later when running out of backend memory > + * when trying to place a page: the earlier preallocation only > makes > + * it less likely to fail, but nothing (not even huge page > + * reservation) will guarantee that postcopy will find a free > page > + * to place once the guest is running on the destination. > + * > + * We temporarily allocate "requested-size" during > + * POSTCOPY_NOTIFY_INBOUND_ADVISE, before migrating any memory. > This > + * resembles what is done with ordinary memory backends. > + * > + * We need to have a matching requested size on source and > + * destination that we actually temporarily allocated the right > + * amount of memory. As requested-size changed when restoring the > + * state, check against the initial value. > + */ > + if (vmem->requested_size != vmem->initial_requested_size) { > + error_report("postcopy with 'prealloc=on' needs matching" > + " 'requested-size' on source and destination"); > + return -EINVAL; > + } > + } else { > + ret = virtio_mem_restore_prealloc(vmem); > + if (ret) { > + return ret; > + } > + } > + } > + > /* > * We started out with all memory discarded and our memory region is > mapped > * into an address space. Replay, now that we updated the bitmap. > @@ -1189,6 +1324,7 @@ static void virtio_mem_instance_init(Object *obj) > > notifier_list_init(&vmem->size_change_notifiers); > QLIST_INIT(&vmem->rdl_list); > + vmem->postcopy_notifier.notify = virtio_mem_postcopy_notify; > > object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", > virtio_mem_get_size, > NULL, NULL, NULL); > diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h > index 7745cfc1a3..45395152d2 100644 > --- a/include/hw/virtio/virtio-mem.h > +++ b/include/hw/virtio/virtio-mem.h > @@ -61,6 +61,9 @@ struct VirtIOMEM { > /* requested size */ > uint64_t requested_size; > > + /* initial requested size on startup */ > + uint64_t initial_requested_size; > + > /* block size and alignment */ > uint64_t block_size; > > @@ -77,6 +80,9 @@ struct VirtIOMEM { > /* notifiers to notify when "size" changes */ > NotifierList size_change_notifiers; > > + /* notifier for postcopy events */ > + NotifierWithReturn postcopy_notifier; > + > /* listeners to notify on plug/unplug activity. */ > QLIST_HEAD(, RamDiscardListener) rdl_list; > }; > -- > 2.34.1 > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK