On 06.03.20 08:38, Vladimir Sementsov-Ogievskiy wrote: > Currently, block_copy operation lock the whole requested region. But > there is no reason to lock clusters, which are already copied, it will > disturb other parallel block_copy requests for no reason. > > Let's instead do the following: > > Lock only sub-region, which we are going to operate on. Then, after > copying all dirty sub-regions, we should wait for intersecting > requests block-copy, if they failed, we should retry these new dirty > clusters. > > Signed-off-by: Vladimir Sementsov-Ogievskiy <[email protected]> > Reviewed-by: Andrey Shinkevich <[email protected]> > --- > block/block-copy.c | 128 ++++++++++++++++++++++++++++++++++++--------- > 1 file changed, 104 insertions(+), 24 deletions(-) > > diff --git a/block/block-copy.c b/block/block-copy.c > index 2b29131653..d66b8eb691 100644 > --- a/block/block-copy.c > +++ b/block/block-copy.c
[...]
> +/* Called only on full-dirty region */
> static void block_copy_inflight_req_begin(BlockCopyState *s,
> BlockCopyInFlightReq *req,
> int64_t offset, int64_t bytes)
> {
> + assert(!find_conflicting_inflight_req(s, offset, bytes));
> +
> + bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
> + s->in_flight_bytes += bytes;
> +
> req->offset = offset;
> req->bytes = bytes;
> qemu_co_queue_init(&req->wait_queue);
> QLIST_INSERT_HEAD(&s->inflight_reqs, req, list);
> }
>
> -static void coroutine_fn block_copy_inflight_req_end(BlockCopyInFlightReq
> *req)
> +/*
> + * block_copy_inflight_req_shrink
> + *
> + * Drop the tail of the request to be handled later. Set dirty bits back and
> + * wake up all requests waiting for us (may be some of them are not
> intersecting
> + * with shrunk request)
> + */
> +static void coroutine_fn block_copy_inflight_req_shrink(BlockCopyState *s,
> + BlockCopyInFlightReq *req, int64_t new_bytes)
> {
> + if (new_bytes == req->bytes) {
> + return;
> + }
> +
> + assert(new_bytes > 0 && new_bytes < req->bytes);
> +
> + bdrv_set_dirty_bitmap(s->copy_bitmap,
> + req->offset + new_bytes, req->bytes - new_bytes);
I think we need to reduce in_flight_bytes here.
> +
> + req->bytes = new_bytes;
> + qemu_co_queue_restart_all(&req->wait_queue);
> +}
> +
> +static void coroutine_fn block_copy_inflight_req_end(BlockCopyState *s,
> + BlockCopyInFlightReq
> *req,
> + int ret)
> +{
> + s->in_flight_bytes -= req->bytes;
> + if (ret < 0) {
> + bdrv_set_dirty_bitmap(s->copy_bitmap, req->offset, req->bytes);
> + }
> QLIST_REMOVE(req, list);
> qemu_co_queue_restart_all(&req->wait_queue);
> }
[...]
> @@ -432,7 +479,40 @@ int coroutine_fn block_copy(BlockCopyState *s,
> bytes -= cur_bytes;
> }
>
> - block_copy_inflight_req_end(&req);
> + return found_dirty;
> +}
> +
> +/*
> + * block_copy
> + *
> + * Copy requested region, accordingly to dirty bitmap.
> + * Collaborate with parallel block_copy requests: if they success it help
> us. If
s/success/succeed/, s/it help/it will help/
> + * they fail, we retry not-copied regions. So, if we return error, it means
> that
s/retry/will retry/
(In theory also s/it means/it will mean/, but I suppose that also works
as-is.)
> + * io operation failed in context of _this_ block_copy call, not some
> parallel
Perhaps rather “some I/O operation failed in the context of […]”?
> + * operation.
> + */
> +int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
> + bool *error_is_read)
> +{
> + int ret;
> +
> + do {
> + ret = block_copy_dirty_clusters(s, offset, bytes, error_is_read);
> +
> + if (ret == 0) {
> + ret = block_copy_wait_one(s, offset, bytes);
> + }
> +
> + /*
> + * We retry in two cases:
> + * 1. Some progress done
> + * Something was copied, which means that there were yield points
> + * and some new dirty bits may have appeared (due to failed
> parallel
> + * block-copy requests).
> + * 2. We have waited for some intersecting block-copy request
> + * It may have failed and produced new dirty bits.
> + */
> + } while (ret > 0);
>
> return ret;
> }
This new code looks good.
Max
signature.asc
Description: OpenPGP digital signature
