> Am 10.11.2025 um 16:48 schrieb Hanna Czenczek <[email protected]>:
>
> Like in “rbd: Run co BH CB in the coroutine’s AioContext”, drop the
> completion flag, yield exactly once, and run the BH in the coroutine’s
> AioContext.
>
> (Can be reproduced with multiqueue by adding a usleep(100000) before the
> `while (!task.complete)` loops.)
>
> Like in “iscsi: Run co BH CB in the coroutine’s AioContext”, this makes
> nfs_co_generic_bh_cb() trivial, so we can drop it in favor of just
> calling aio_co_wake() directly.
>
> Cc: [email protected]
> Signed-off-by: Hanna Czenczek <[email protected]>
> ---
> block/nfs.c | 41 ++++++++++++++++-------------------------
> 1 file changed, 16 insertions(+), 25 deletions(-)
>
> diff --git a/block/nfs.c b/block/nfs.c
> index 0a7d38db09..1d3a34a30c 100644
> --- a/block/nfs.c
> +++ b/block/nfs.c
> @@ -69,7 +69,6 @@ typedef struct NFSClient {
> typedef struct NFSRPC {
> BlockDriverState *bs;
> int ret;
> - int complete;
> QEMUIOVector *iov;
> struct stat *st;
> Coroutine *co;
> @@ -230,14 +229,6 @@ static void coroutine_fn
> nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
> };
> }
>
> -static void nfs_co_generic_bh_cb(void *opaque)
> -{
> - NFSRPC *task = opaque;
> -
> - task->complete = 1;
> - aio_co_wake(task->co);
> -}
> -
> /* Called (via nfs_service) with QemuMutex held. */
> static void
> nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
> @@ -256,8 +247,16 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void
> *data,
> if (task->ret < 0) {
> error_report("NFS Error: %s", nfs_get_error(nfs));
> }
> - replay_bh_schedule_oneshot_event(task->client->aio_context,
> - nfs_co_generic_bh_cb, task);
> +
> + /*
> + * Safe to call: nfs_service(), which called us, is only run from the FD
> + * handlers, never from the request coroutine. The request coroutine in
> + * turn will yield unconditionally.
> + * No need to release the lock, even if we directly enter the coroutine,
> as
> + * the lock is never re-taken after yielding. (Note: If we do enter the
> + * coroutine, @task will probably be dangling once aio_co_wake()
> returns.)
> + */
> + aio_co_wake(task->co);
> }
>
> static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, int64_t offset,
> @@ -278,9 +277,7 @@ static int coroutine_fn nfs_co_preadv(BlockDriverState
> *bs, int64_t offset,
>
> nfs_set_events(client);
> }
> - while (!task.complete) {
> - qemu_coroutine_yield();
> - }
> + qemu_coroutine_yield();
>
> if (task.ret < 0) {
> return task.ret;
> @@ -328,9 +325,7 @@ static int coroutine_fn nfs_co_pwritev(BlockDriverState
> *bs, int64_t offset,
>
> nfs_set_events(client);
> }
> - while (!task.complete) {
> - qemu_coroutine_yield();
> - }
> + qemu_coroutine_yield();
>
> if (my_buffer) {
> g_free(buf);
> @@ -358,9 +353,7 @@ static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
>
> nfs_set_events(client);
> }
> - while (!task.complete) {
> - qemu_coroutine_yield();
> - }
> + qemu_coroutine_yield();
>
> return task.ret;
> }
> @@ -723,8 +716,8 @@ nfs_get_allocated_file_size_cb(int ret, struct
> nfs_context *nfs, void *data,
> if (task->ret < 0) {
> error_report("NFS Error: %s", nfs_get_error(nfs));
> }
> - replay_bh_schedule_oneshot_event(task->client->aio_context,
> - nfs_co_generic_bh_cb, task);
> + /* Safe to call, see nfs_co_generic_cb() */
> + aio_co_wake(task->co);
> }
>
> static int64_t coroutine_fn nfs_co_get_allocated_file_size(BlockDriverState
> *bs)
> @@ -748,9 +741,7 @@ static int64_t coroutine_fn
> nfs_co_get_allocated_file_size(BlockDriverState *bs)
>
> nfs_set_events(client);
> }
> - while (!task.complete) {
> - qemu_coroutine_yield();
> - }
> + qemu_coroutine_yield();
>
> return (task.ret < 0 ? task.ret : st.st_blocks * 512);
> }
> --
> 2.51.1
>
Hallo Hanna,
it has been a long time where I was inactive in Qemu development and I should
have checked this earlier, but it seems that this patch accidentally broke
libnfs usage with at least qemu cmdline tools like qemu-img. Again, sorry for
not testing this earlier, but I have only recently entered the Qemu show again.
I found this glitch while working on a patch to include libnfs v6 support in
qemu to avoid dropping libnfs support.
A simple call like:
qemu-img create -f qcow2 nfs://nfsserver/image.qcow2 10G
hangs forever in the second write request.
I have not confirmed, but this might also hold true for libiscsi which
internally is quite identical (at least it was 3 years ago ;-))
You are right, the coroutine can only yield once, but it seems that the bottom
half logic is still needed.
Maybe you have an idea why? I remember that we had this differently first and
implemented it after Stefan pointed out that it might be needed.
I will try to find the thread about this discussion.
This patch here restores functionality for me:
diff --git a/block/nfs.c b/block/nfs.c
index 1d3a34a30c..45731ea3da 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -75,6 +75,12 @@ typedef struct NFSRPC {
NFSClient *client;
} NFSRPC;
+static void nfs_co_generic_bh_cb(void *opaque)
+{
+ NFSRPC *task = opaque;
+ aio_co_wake(task->co);
+}
+
static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
{
g_autoptr(GUri) uri = g_uri_parse(filename, G_URI_FLAGS_NONE, NULL);
@@ -248,15 +254,8 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void
*data,
error_report("NFS Error: %s", nfs_get_error(nfs));
}
- /*
- * Safe to call: nfs_service(), which called us, is only run from the FD
- * handlers, never from the request coroutine. The request coroutine in
- * turn will yield unconditionally.
- * No need to release the lock, even if we directly enter the coroutine, as
- * the lock is never re-taken after yielding. (Note: If we do enter the
- * coroutine, @task will probably be dangling once aio_co_wake() returns.)
- */
- aio_co_wake(task->co);
+ replay_bh_schedule_oneshot_event(task->client->aio_context,
+ nfs_co_generic_bh_cb, task);
}
static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, int64_t offset,
@@ -716,8 +715,8 @@ nfs_get_allocated_file_size_cb(int ret, struct nfs_context
*nfs, void *data,
if (task->ret < 0) {
error_report("NFS Error: %s", nfs_get_error(nfs));
}
- /* Safe to call, see nfs_co_generic_cb() */
- aio_co_wake(task->co);
+ replay_bh_schedule_oneshot_event(task->client->aio_context,
+ nfs_co_generic_bh_cb, task);
}
static int64_t coroutine_fn nfs_co_get_allocated_file_size(BlockDriverState
*bs)
Best
Peter