Hi
On Thu, Jul 21, 2022 at 10:24 AM Hogan Wang via <[email protected]>
wrote:
> There's no way to cancel the current executing dump process, lead to the
> virtual machine manager daemon((e.g. libvirtd) cannot restore the dump
> job after daemon restart.
>
> Add the 'cancelling' and 'cancelled' dump states.
>
> Use 'dump-cancel' qmp command Set the dump state as 'cancelling'.
> The dump process check the 'cancelling' state and break loops.
> The 'cancelled' state mark the dump process cancelled success.
>
This will only really work if the dump was detached. You should explain
that in the commit message & in the documentation.
>
> ---
> dump/dump.c | 38 ++++++++++++++++++++++++++++++++++++--
> include/sysemu/runstate.h | 1 +
> qapi/dump.json | 21 ++++++++++++++++++++-
> 3 files changed, 57 insertions(+), 3 deletions(-)
>
> diff --git a/dump/dump.c b/dump/dump.c
> index 4d9658ffa2..a0ac85aa02 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -118,6 +118,10 @@ static int fd_write_vmcore(const void *buf, size_t
> size, void *opaque)
> DumpState *s = opaque;
> size_t written_size;
>
> + if (qemu_system_dump_cancelling()) {
> + return -ECANCELED;
> + }
> +
> written_size = qemu_write_full(s->fd, buf, size);
> if (written_size != size) {
> return -errno;
> @@ -627,6 +631,10 @@ static void dump_iterate(DumpState *s, Error **errp)
>
> do {
> block = s->next_block;
> + if (qemu_system_dump_cancelling()) {
> + error_setg(errp, "dump: job cancelled");
> + return;
> + }
>
> size = block->target_end - block->target_start;
> if (s->has_filter) {
> @@ -1321,6 +1329,11 @@ static void write_dump_pages(DumpState *s, Error
> **errp)
> * first page of page section
> */
> while (get_next_page(&block_iter, &pfn_iter, &buf, s)) {
> + if (qemu_system_dump_cancelling()) {
> + error_setg(errp, "dump: job cancelled");
> + goto out;
> + }
> +
> /* check zero page */
> if (buffer_is_zero(buf, s->dump_info.page_size)) {
> ret = write_cache(&page_desc, &pd_zero,
> sizeof(PageDescriptor),
> @@ -1540,6 +1553,22 @@ bool qemu_system_dump_in_progress(void)
> return (qatomic_read(&state->status) == DUMP_STATUS_ACTIVE);
> }
>
> +bool qemu_system_dump_cancelling(void)
> +{
> + DumpState *state = &dump_state_global;
> + return (qatomic_read(&state->status) == DUMP_STATUS_CANCELLING);
> +}
> +
> +void qmp_dump_cancel(Error **errp)
> +{
> + DumpState *state = &dump_state_global;
> + if (!qemu_system_dump_in_progress()) {
> + return;
> + }
> + qatomic_set(&state->status, DUMP_STATUS_CANCELLING);
> +}
>
qemu_system_dump_in_progress() should probably be updated to take
CANCELLING state into account (and avoid another dump to be created
concurrently)
> +
> +
> /* calculate total size of memory to be dumped (taking filter into
> * acoount.) */
> static int64_t dump_calculate_size(DumpState *s)
> @@ -1838,8 +1867,13 @@ static void dump_process(DumpState *s, Error **errp)
>
> /* make sure status is written after written_size updates */
> smp_wmb();
> - qatomic_set(&s->status,
> - (*errp ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED));
> + if (qemu_system_dump_cancelling()) {
> + qatomic_set(&s->status, DUMP_STATUS_CANCELLED);
> + } else if (*errp) {
> + qatomic_set(&s->status, DUMP_STATUS_FAILED);
> + } else {
> + qatomic_set(&s->status, DUMP_STATUS_COMPLETED);
> + }
>
> /* send DUMP_COMPLETED message (unconditionally) */
> result = qmp_query_dump(NULL);
> diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
> index f3ed52548e..a36c1d43f6 100644
> --- a/include/sysemu/runstate.h
> +++ b/include/sysemu/runstate.h
> @@ -76,6 +76,7 @@ void qemu_system_reset(ShutdownCause reason);
> void qemu_system_guest_panicked(GuestPanicInformation *info);
> void qemu_system_guest_crashloaded(GuestPanicInformation *info);
> bool qemu_system_dump_in_progress(void);
> +bool qemu_system_dump_cancelling(void);
>
> #endif
>
> diff --git a/qapi/dump.json b/qapi/dump.json
> index 90859c5483..6dfbb6b7de 100644
> --- a/qapi/dump.json
> +++ b/qapi/dump.json
> @@ -108,7 +108,7 @@
> # Since: 2.6
> ##
> { 'enum': 'DumpStatus',
> - 'data': [ 'none', 'active', 'completed', 'failed' ] }
> + 'data': [ 'none', 'active', 'completed', 'failed', 'cancelling',
> 'cancelled' ] }
>
> ##
> # @DumpQueryResult:
> @@ -200,3 +200,22 @@
> ##
> { 'command': 'query-dump-guest-memory-capability',
> 'returns': 'DumpGuestMemoryCapability' }
> +
> +##
> +# @dump-cancel:
> +#
> +# Cancel the current executing dump process.
> +#
> +# Returns: nothing on success
> +#
> +# Notes: This command succeeds even if there is no dump process running.
> +#
> +# Since: 7.2
> +#
> +# Example:
> +#
> +# -> { "execute": "dump-cancel" }
> +# <- { "return": {} }
> +#
> +##
> +{ 'command': 'dump-cancel' }
> --
> 2.33.0
>
Looks good to me, but I wish there would be some tests. Could you try to
write some?
--
Marc-André Lureau