MADV_SPLIT enables doublemap on hugetlb. Do that if doublemap=true specified for the migration.
Signed-off-by: Peter Xu <pet...@redhat.com> --- migration/postcopy-ram.c | 16 ++++++++++++++++ migration/ram.c | 18 ++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 86ff73c2c0..dbc7e54e4a 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -694,6 +694,22 @@ static int ram_block_enable_notify(RAMBlock *rb, void *opaque) */ reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; if (minor_fault) { + /* + * MADV_SPLIT implicitly enables doublemap mode for hugetlb. If + * that fails (e.g. on old kernels) we need to fail the migration. + * + * It's a bit late to fail here as we could have migrated lots of + * pages in precopy, but early failure will require us to allocate + * hugetlb pages secretly in QEMU which is not friendly to admins + * and it may affect the global hugetlb pool. Considering it is + * normally always limited, keep the failure late but tolerable. + */ + if (qemu_madvise(qemu_ram_get_host_addr(rb), rb->postcopy_length, + QEMU_MADV_SPLIT)) { + error_report("%s: madvise(MADV_SPLIT) failed (ret=%d) but " + "required for doublemap.", __func__, -errno); + return -1; + } reg_struct.mode |= UFFDIO_REGISTER_MODE_MINOR; } diff --git a/migration/ram.c b/migration/ram.c index 37d7b3553a..4d786f4b97 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3891,6 +3891,19 @@ static int migrate_hugetlb_doublemap_init(void) RAMBLOCK_FOREACH_NOT_IGNORED(rb) { if (qemu_ram_is_hugetlb(rb)) { + /* + * MADV_SPLIT implicitly enables doublemap mode for hugetlb on + * the guest mapped ranges. If that fails (e.g. on old + * kernels) we need to fail the migration. Note, the + * host_mirror mapping below can be kept as hugely mapped. + */ + if (qemu_madvise(qemu_ram_get_host_addr(rb), rb->mmap_length, + QEMU_MADV_SPLIT)) { + error_report("%s: madvise(MADV_SPLIT) required for doublemap", + __func__); + return -1; + } + /* * Firstly, we remap the same ramblock into another range of * virtual address, so that we can write to the pages without @@ -3898,6 +3911,11 @@ static int migrate_hugetlb_doublemap_init(void) */ addr = ramblock_file_map(rb); if (addr == MAP_FAILED) { + /* + * No need to undo MADV_SPLIT because this is dest node and + * we're going to bail out anyway. Leave that for mm exit + * to clean things up. + */ ret = -errno; error_report("%s: Duplicate mapping for hugetlb ramblock '%s'" "failed: %s", __func__, qemu_ram_get_idstr(rb), -- 2.37.3