* Bohdan Trach (bv.tr...@gmail.com) wrote: > From: Bohdan Trach <bohdan.tr...@mailbox.tu-dresden.de> > > Extend memory page saving and loading functions to utilize information > available in checkpoints to avoid sending full pages over the network. > > Signed-off-by: Bohdan Trach <bohdan.tr...@mailbox.tu-dresden.de>
There are a couple of things I don't understand about this: 1) How does the source fill it's hashes table? Is it just given the same dump file as the destination? 2) Why does RAM_SAVE_FLAG_PAGE_HASH exist; if you're sending the full page to the destination, why do we also send the hash? > --- > arch_init.c | 167 > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 158 insertions(+), 9 deletions(-) > > diff --git a/arch_init.c b/arch_init.c > index eda86d4..fca56f0 100644 > --- a/arch_init.c > +++ b/arch_init.c > @@ -128,6 +128,8 @@ static uint64_t bitmap_sync_count; > #define RAM_SAVE_FLAG_CONTINUE 0x20 > #define RAM_SAVE_FLAG_XBZRLE 0x40 > /* 0x80 is reserved in migration.h start with 0x100 next */ > +#define RAM_SAVE_FLAG_HASH 0x100 > +#define RAM_SAVE_FLAG_PAGE_HASH 0x200 > > static struct defconfig_file { > const char *filename; > @@ -790,6 +792,7 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, > ram_addr_t offset, > uint8_t *p; > int ret; > bool send_async = true; > + uint8_t hash[MD5_DIGEST_LENGTH]; > > p = memory_region_get_ram_ptr(mr) + offset; > > @@ -841,16 +844,45 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, > ram_addr_t offset, > > /* XBZRLE overflow or normal page */ > if (pages == -1) { > - *bytes_transferred += save_page_header(f, block, > - offset | RAM_SAVE_FLAG_PAGE); > - if (send_async) { > - qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); > - } else { > - qemu_put_buffer(f, p, TARGET_PAGE_SIZE); > + if (is_outgoing_with_checkpoint() && > + 0 == strncmp(block->mr->name, "pc.ram", strlen("pc.ram"))) { > + MD5(p, TARGET_PAGE_SIZE, hash); > + > + if (NULL != bsearch(hash, hashes, hashes_entries, > + MD5_DIGEST_LENGTH, uint128_compare)) { > + > + *bytes_transferred += save_page_header(f, block, offset | > RAM_SAVE_FLAG_HASH); > +#ifdef DEBUG_HASH > + qemu_put_buffer(f, p, TARGET_PAGE_SIZE); > + *bytes_transferred += TARGET_PAGE_SIZE; > +#endif > + qemu_put_buffer(f, hash, MD5_DIGEST_LENGTH); > + *bytes_transferred += MD5_DIGEST_LENGTH; > + pages = 1; > + DPRINTF("ram_save_page: FLAG_HASH guest_phy_addr=%08lx > flags=%lx hash=%s\n", offset&TARGET_PAGE_MASK, (offset | RAM_SAVE_FLAG_HASH)& > ~TARGET_PAGE_MASK, md5s(hash)); > + } else { > + *bytes_transferred += save_page_header(f, block, offset | > RAM_SAVE_FLAG_PAGE_HASH); > + qemu_put_buffer(f, p, TARGET_PAGE_SIZE); > + qemu_put_buffer(f, hash, MD5_DIGEST_LENGTH); I think there's a problem here that given the source is still running it's CPU and changing memory; it can be writing to the page at the same time, so the page you send might not match the hash you send; we're guaranteed to resend the page again if it was written to, but that still doesn't make these two things match; although as I say above I'm not sure why SAVE_FLAG_PAGE_HASH exists. > + *bytes_transferred += TARGET_PAGE_SIZE; > + *bytes_transferred += MD5_DIGEST_LENGTH; > + pages = 1; > + DPRINTF("ram_save_page: FLAG_PAGE_HASH guest_phy_addr=%08lx > flags=%lx hash=%s\n", offset&TARGET_PAGE_MASK, (offset | > RAM_SAVE_FLAG_PAGE_HASH)& ~TARGET_PAGE_MASK, md5s(hash)); > + } > + } > + if (pages == -1) { > + *bytes_transferred += save_page_header(f, block, > + offset | > RAM_SAVE_FLAG_PAGE); > + if (send_async) { > + qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); > + } else { > + qemu_put_buffer(f, p, TARGET_PAGE_SIZE); > + } > + *bytes_transferred += TARGET_PAGE_SIZE; > + pages = 1; > + acct_info.norm_pages++; > + DPRINTF("ram_save_page: FLAG_PAGE guest_phy_addr=%08lx > flags=%lx", offset&TARGET_PAGE_MASK, (offset | RAM_SAVE_FLAG_PAGE)& > ~TARGET_PAGE_MASK); > } > - *bytes_transferred += TARGET_PAGE_SIZE; > - pages = 1; > - acct_info.norm_pages++; > } > > XBZRLE_cache_unlock(); > @@ -963,6 +995,8 @@ void free_xbzrle_decoded_buf(void) > xbzrle_decoded_buf = NULL; > } > > +extern const char *checkpoint_path; > + > static void migration_end(void) > { > if (migration_bitmap) { > @@ -1281,6 +1315,58 @@ void ram_handle_compressed(void *host, uint8_t ch, > uint64_t size) > } > } > > +/** > + * If migration source determined we already have the chunk, it only > + * sends a hash of the page's content. Read it from local storage, > + * e.g., an old checkpoint. > + * @param host Address which, after this function, should have a content > matching the functions 2nd parameter. > + * @param hash The hash value. > + * @param size Size of the memory region in bytes. Typically, size is a > single page, e.g., 4 KiB. > + * @param fd file descriptor of checkpoint file > + */ > +void ram_handle_hash(void *host, uint64_t guest_phy_addr, uint8_t *hash, > uint64_t size); > +void ram_handle_hash(void *host, uint64_t guest_phy_addr, uint8_t *hash, > uint64_t size) > +{ > + assert(fd_checkpoint != -1); > + > + /* fprintf(stdout, "ram_handle_hash: incoming has %u!\n", hash); */ > + uint8_t local_page_hash[MD5_DIGEST_LENGTH]; > + MD5(host, TARGET_PAGE_SIZE, local_page_hash); > + > + if (0 != memcmp(local_page_hash, hash, MD5_DIGEST_LENGTH)) { > + /* Computed hash does not match the hash the migration source > + sent us for this page. */ > + hash_offset_entry* v = bsearch(hash, hash_offset_array, > hash_offset_entries, > + sizeof(hash_offset_entry), > cmp_hash_offset_entry); > + if (v == NULL) { > + /* For some reason the source thought the destination > + already has this block. But it doesn't. Hmmm ... */ > + DPRINTF("ram_handle_hash: unknown hash %s at guest phy addr > %08lx\n", md5s(hash), guest_phy_addr); > + assert(0); > + } > + > + DPRINTF("ram_handle_hash: guest_phy_addr=%08lx, hash=%s, > offset=%08lx\n", guest_phy_addr, md5s(hash), v->offset); > + > + off_t offset_actual = lseek(fd_checkpoint, v->offset, SEEK_SET); > + assert(offset_actual == v->offset); > + > + ssize_t read_actual = read(fd_checkpoint, host, TARGET_PAGE_SIZE); > + assert(read_actual == TARGET_PAGE_SIZE); > + MD5(host, TARGET_PAGE_SIZE, local_page_hash); > + if (0 != memcmp(local_page_hash, hash, MD5_DIGEST_LENGTH)) { > + DPRINTF("ram_handle_hash: local_page_hash=%s\n", > md5s(local_page_hash)); > + assert(0); > + } > + } > +} > + > +static void add_remote_hash(ram_addr_t addr, uint8_t *hash) { > + uint64_t page_nr = get_page_nr(addr); > + memcpy(&hashes[page_nr * MD5_DIGEST_LENGTH], > + hash, > + MD5_DIGEST_LENGTH); > +} > + > static int ram_load(QEMUFile *f, void *opaque, int version_id) > { > int flags = 0, ret = 0; > @@ -1302,6 +1388,7 @@ static int ram_load(QEMUFile *f, void *opaque, int > version_id) > ram_addr_t addr, total_ram_bytes; > void *host; > uint8_t ch; > + uint8_t hash[MD5_DIGEST_LENGTH]; > > addr = qemu_get_be64(f); > flags = addr & ~TARGET_PAGE_MASK; > @@ -1354,6 +1441,61 @@ static int ram_load(QEMUFile *f, void *opaque, int > version_id) > } > ch = qemu_get_byte(f); > ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); > + DPRINTF("ram_load: FLAG_COMPRESS, addr=%08lx ch=%u\n", addr, ch); Generally try and use trace_ rather than DPRINTF. > + if (fd_checkpoint != -1) { > + if (ch != 0) { > + MD5(host, TARGET_PAGE_SIZE, hash); > + add_remote_hash(addr, hash); > + } else { > + add_remote_hash(addr, all_zeroes_hash); > + } > + } > + break; > + case RAM_SAVE_FLAG_HASH: > + host = host_from_stream_offset(f, addr, flags); > + if (!host) { > + error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); > + ret = -EINVAL; > + break; > + } > + > +#ifdef DEBUG_HASH > + uint8_t src_page[TARGET_PAGE_SIZE]; > + qemu_get_buffer(f, src_page, TARGET_PAGE_SIZE); > +#endif > + qemu_get_buffer(f, hash, MD5_DIGEST_LENGTH); > + > + ram_handle_hash(host, addr, hash, TARGET_PAGE_SIZE); > + > +#ifdef DEBUG_HASH > + uint8_t local_hash[MD5_DIGEST_LENGTH]; > + MD5(host, TARGET_PAGE_SIZE, local_hash); > + assert(0 == memcmp(local_hash, hash, MD5_DIGEST_LENGTH)); > + > + uint8_t src_page_hash[MD5_DIGEST_LENGTH]; > + MD5(src_page, TARGET_PAGE_SIZE, src_page_hash); > + assert(0 == memcmp(src_page_hash, hash, MD5_DIGEST_LENGTH)); > + assert(0 == memcmp(src_page, host, TARGET_PAGE_SIZE)); > +#endif > + assert(is_ram_addr(host)); > + add_remote_hash(addr, hash); > + DPRINTF("ram_load: FLAG_HASH, recv_hash=%s, addr=%08lx\n", > md5s(hash), addr); > + break; > + case RAM_SAVE_FLAG_PAGE_HASH: > + host = host_from_stream_offset(f, addr, flags); > + if (!host) { > + error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); > + ret = -EINVAL; > + break; > + } > + > + qemu_get_buffer(f, host, TARGET_PAGE_SIZE); > + > + qemu_get_buffer(f, hash, MD5_DIGEST_LENGTH); > + > + assert(is_ram_addr(host)); > + add_remote_hash(addr, hash); > + DPRINTF("ram_load: FLAG_PAGE_HASH, hash=%s, addr=%08lx\n", > md5s(hash), addr); > break; > case RAM_SAVE_FLAG_PAGE: > host = host_from_stream_offset(f, addr, flags); > @@ -1363,6 +1505,13 @@ static int ram_load(QEMUFile *f, void *opaque, int > version_id) > break; > } > qemu_get_buffer(f, host, TARGET_PAGE_SIZE); > + > + if (is_ram_addr(host)) { > + uint8_t hash[MD5_DIGEST_LENGTH]; > + MD5(host, TARGET_PAGE_SIZE, hash); > + add_remote_hash(addr, hash); > + DPRINTF("ram_load: FLAG_PAGE, addr=%08lx, hash=%s\n", addr, > md5s(hash)); > + } > break; > case RAM_SAVE_FLAG_XBZRLE: > host = host_from_stream_offset(f, addr, flags); > -- > 2.0.5 Dave -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK