When decompression thread count is set to 1, the current implementation is inefficient because of the following reason: 1. Thread syncronization cost; 2. Data copy;
This patch optimizes the performance for the case of 1 decompress thread. In this case, the compression is done in process_incoming_migration_co, for some fast decompression algorithm, it can help to improve the performance. Signed-off-by: Liang Li <[email protected]> --- migration/ram.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 0cc4f81..fc91997 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1414,6 +1414,9 @@ void migrate_decompress_threads_create(void) int i, thread_count; thread_count = migrate_decompress_threads(); + if (thread_count == 1) { + return; + } decompress_threads = g_new0(QemuThread, thread_count); decomp_param = g_new0(DecompressParam, thread_count); compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); @@ -1432,8 +1435,11 @@ void migrate_decompress_threads_join(void) { int i, thread_count; - quit_decomp_thread = true; thread_count = migrate_decompress_threads(); + if (thread_count == 1) { + return; + } + quit_decomp_thread = true; for (i = 0; i < thread_count; i++) { qemu_mutex_lock(&decomp_param[i].mutex); qemu_cond_signal(&decomp_param[i].cond); @@ -1575,7 +1581,14 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) break; } qemu_get_buffer(f, compressed_data_buf, len); - decompress_data_with_multi_threads(compressed_data_buf, host, len); + if (migrate_decompress_threads() == 1) { + unsigned long pagesize = TARGET_PAGE_SIZE; + uncompress((Bytef *)host, &pagesize, + (const Bytef *)compressed_data_buf, len); + } else { + decompress_data_with_multi_threads(compressed_data_buf, + host, len); + } break; case RAM_SAVE_FLAG_XBZRLE: host = host_from_stream_offset(f, addr, flags); -- 1.9.1
