Lukas Straub <[email protected]> writes:
> Like in the normal ram_load() path, put the received pages into the
> colo cache and mark the pages in the bitmap so that they will be
> flushed to the guest later.
>
> Multifd with COLO is useful to reduce the VM pause time during checkpointing
> for latency sensitive workloads. In such workloads the worst-case latency
> is especially important.
>
> Also, this is already worth it for the precopy phase as it helps with
> converging. Moreover, multifd migration is the preferred way to do migration
> nowadays and this allows to use multifd compression with COLO.
>
> Benchmark:
> Cluster nodes
> - Intel Xenon E5-2630 v3
> - 48Gb RAM
> - 10G Ethernet
> Guest
> - Windows Server 2016
> - 6Gb RAM
> - 4 cores
> Workload
> - Upload a file to the guest with SMB to simulate moderate
> memory dirtying
> - Measure the memory transfer time portion of each checkpoint
> - 600ms COLO checkpoint interval
>
> Results
> Plain
> idle mean: 4.50ms 99per: 10.33ms
> load mean: 24.30ms 99per: 78.05ms
> Multifd-4
> idle mean: 6.48ms 99per: 10.41ms
> load mean: 14.12ms 99per: 31.27ms
>
> Evaluation
> While multifd has slightly higher latency when the guest idles, it is
> 10ms faster under load and more importantly it's worst case latency is
> less than 1/2 of plain under load as can be seen in the 99. Percentile.
>
> Signed-off-by: Juan Quintela <[email protected]>
> Signed-off-by: Lukas Straub <[email protected]>
> ---
> MAINTAINERS | 1 +
> migration/meson.build | 2 +-
> migration/multifd-colo.c | 50
> ++++++++++++++++++++++++++++++++++++++++++++++
> migration/multifd-colo.h | 26 ++++++++++++++++++++++++
> migration/multifd-nocomp.c | 10 +++++++++-
> migration/multifd.c | 8 ++++++++
> migration/multifd.h | 5 ++++-
> 7 files changed, 99 insertions(+), 3 deletions(-)
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index
> b239cf00f66f0683d481150ba27447316bcab72a..900d6b9436670515162eba4aec6ee0f023c9a2fc
> 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -3850,6 +3850,7 @@ COLO Framework
> M: Lukas Straub <[email protected]>
> S: Maintained
> F: migration/colo*
> +F: migration/multifd-colo.*
> F: include/migration/colo.h
> F: include/migration/failover.h
> F: docs/COLO-FT.txt
> diff --git a/migration/meson.build b/migration/meson.build
> index
> c7f39bdb55239ecb0e775c77b90a1aa9e6a4a9ce..c9f0f5f9f2137536497e53e960ce70654ad1b394
> 100644
> --- a/migration/meson.build
> +++ b/migration/meson.build
> @@ -39,7 +39,7 @@ system_ss.add(files(
> ), gnutls, zlib)
>
> if get_option('replication').allowed()
> - system_ss.add(files('colo-failover.c', 'colo.c'))
> + system_ss.add(files('colo-failover.c', 'colo.c', 'multifd-colo.c'))
> else
> system_ss.add(files('colo-stubs.c'))
> endif
> diff --git a/migration/multifd-colo.c b/migration/multifd-colo.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..c47f5044663969e0c9af56da5ec34902d635810a
> --- /dev/null
> +++ b/migration/multifd-colo.c
> @@ -0,0 +1,50 @@
> +/*
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + *
> + * multifd colo implementation
> + *
> + * Copyright (c) Lukas Straub <[email protected]>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "exec/target_page.h"
> +#include "qemu/error-report.h"
> +#include "qapi/error.h"
> +#include "ram.h"
> +#include "multifd.h"
> +#include "options.h"
> +#include "io/channel-socket.h"
> +#include "migration/colo.h"
> +#include "multifd-colo.h"
> +#include "system/ramblock.h"
> +
A bunch of these^ are unused.
> +void multifd_colo_prepare_recv(MultiFDRecvParams *p)
> +{
> + /*
> + * While we're still in precopy state (not yet in colo state), we copy
> + * received pages to both guest and cache. No need to set dirty bits,
> + * since guest and cache memory are in sync.
> + */
> + if (migration_incoming_in_colo_state()) {
> + colo_record_bitmap(p->block, p->normal, p->normal_num);
> + colo_record_bitmap(p->block, p->zero, p->zero_num);
> + }
> +}
> +
> +void multifd_colo_process_recv(MultiFDRecvParams *p)
> +{
> + if (!migration_incoming_in_colo_state()) {
> + for (int i = 0; i < p->normal_num; i++) {
> + void *guest = p->block->host + p->normal[i];
> + void *cache = p->host + p->normal[i];
> + memcpy(guest, cache, multifd_ram_page_size());
> + }
> + for (int i = 0; i < p->zero_num; i++) {
> + void *guest = p->block->host + p->zero[i];
> + memset(guest, 0, multifd_ram_page_size());
> + }
> + }
> +}
> diff --git a/migration/multifd-colo.h b/migration/multifd-colo.h
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..82eaf3f48c47de2f090f9de52f9d57a337d4754a
> --- /dev/null
> +++ b/migration/multifd-colo.h
> @@ -0,0 +1,26 @@
> +/*
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + *
> + * multifd colo header
> + *
> + * Copyright (c) Lukas Straub <[email protected]>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef QEMU_MIGRATION_MULTIFD_COLO_H
> +#define QEMU_MIGRATION_MULTIFD_COLO_H
> +
> +#ifdef CONFIG_REPLICATION
> +
> +void multifd_colo_prepare_recv(MultiFDRecvParams *p);
> +void multifd_colo_process_recv(MultiFDRecvParams *p);
> +
> +#else
> +
> +static inline void multifd_colo_prepare_recv(MultiFDRecvParams *p) {}
> +static inline void multifd_colo_process_recv(MultiFDRecvParams *p) {}
> +
> +#endif
> +#endif
> diff --git a/migration/multifd-nocomp.c b/migration/multifd-nocomp.c
> index
> 9be79b3b8e00371ebff9e112766c225bec260bf7..9f7a792fa761b3bc30b971b35f464103a61787f0
> 100644
> --- a/migration/multifd-nocomp.c
> +++ b/migration/multifd-nocomp.c
> @@ -16,6 +16,7 @@
> #include "file.h"
> #include "migration-stats.h"
> #include "multifd.h"
> +#include "multifd-colo.h"
> #include "options.h"
> #include "migration.h"
> #include "qapi/error.h"
> @@ -269,7 +270,6 @@ int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error
> **errp)
> return -1;
> }
>
> - p->host = p->block->host;
> for (i = 0; i < p->normal_num; i++) {
> uint64_t offset = be64_to_cpu(packet->offset[i]);
>
> @@ -294,6 +294,14 @@ int multifd_ram_unfill_packet(MultiFDRecvParams *p,
> Error **errp)
> p->zero[i] = offset;
> }
>
> + if (migrate_colo()) {
> + multifd_colo_prepare_recv(p);
> + assert(p->block->colo_cache);
> + p->host = p->block->colo_cache;
> + } else {
> + p->host = p->block->host;
> + }
> +
> return 0;
> }
>
> diff --git a/migration/multifd.c b/migration/multifd.c
> index
> 332e6fc58053462419f3171f6c320ac37648ef7b..220ed8564960fdabc58e4baa069dd252c8ad293c
> 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -29,6 +29,7 @@
> #include "qemu-file.h"
> #include "trace.h"
> #include "multifd.h"
> +#include "multifd-colo.h"
> #include "options.h"
> #include "qemu/yank.h"
> #include "io/channel-file.h"
> @@ -1258,6 +1259,13 @@ static int multifd_ram_state_recv(MultiFDRecvParams
> *p, Error **errp)
> int ret;
>
> ret = multifd_recv_state->ops->recv(p, errp);
> + if (ret != 0) {
> + return ret;
> + }
> +
> + if (migrate_colo()) {
> + multifd_colo_process_recv(p);
> + }
>
> return ret;
> }
> diff --git a/migration/multifd.h b/migration/multifd.h
> index
> 89a395aef2b09a6762c45b5361e0ab63256feff6..fbc35702b062fdc3213ce92baed35994f5967c2b
> 100644
> --- a/migration/multifd.h
> +++ b/migration/multifd.h
> @@ -279,7 +279,10 @@ typedef struct {
> uint64_t packets_recved;
> /* ramblock */
> RAMBlock *block;
> - /* ramblock host address */
> + /*
> + * Normally, it points to ramblock's host address. When COLO
> + * is enabled, it points to the mirror cache for the ramblock.
> + */
> uint8_t *host;
> /* buffers to recv */
> struct iovec *iov;