Hello tech@ Due to the challenges of having a large diff reviewed I've had another think about how I can break up the FUSE changes so that they are smaller and easier to review.
This is the first of these diffs. The current design uses a fixed size fusebuf that consists of a header and a union of structs that are used for different VFS operations. In addition, there may be data of variable size associated with an operation. e.g. the buffer passed to write(2). see fb_setup(9). If there is additional data to be exchanged between libfuse and the kernel then libfuse uses an ioctl on the device to read or write this variable sized data after the fusebuf has been read or written. This is not how the fuse protocol works on Linux. Instead, the fusebuf is read or written in a single read(2) or write(2). This change is the first step in setting the OpenBSD implementation up for improved compatibility in the fuse kernel interface. The fusebuf struct is shared between the kernel and libfuse but its layout differs slightly between the two. The kernel has knowledge of the size of data that it is sending or receiving (e.g. read, write, readdir, link, lookup) and so can malloc the exact amount of memory required. libfuse must read the entire fusebuf but doesn't know its size in advance so must have a buffer large enough to cater for the worst case scenario. Since libfuse now uses a fixed size fusebuf, it no longer needs to free the variable memory previously allocated for the data. stsp@ has been kind enough to provide initial feedback. Is it now ready for an official OK? Index: lib/libfuse/fuse.c =================================================================== RCS file: /cvs/src/lib/libfuse/fuse.c,v retrieving revision 1.51 diff -u -p -r1.51 fuse.c --- lib/libfuse/fuse.c 28 Jun 2019 13:32:42 -0000 1.51 +++ lib/libfuse/fuse.c 8 Jun 2021 14:15:29 -0000 @@ -154,9 +154,9 @@ fuse_loop(struct fuse *fuse) { struct fusebuf fbuf; struct fuse_context ctx; - struct fb_ioctl_xch ioexch; struct kevent event[5]; struct kevent ev; + ssize_t fbuf_size; ssize_t n; int ret; @@ -201,29 +201,15 @@ fuse_loop(struct fuse *fuse) strsignal(signum)); } } else if (ret > 0) { - n = read(fuse->fc->fd, &fbuf, sizeof(fbuf)); - if (n != sizeof(fbuf)) { + n = read(fuse->fc->fd, &fbuf, FUSEBUFSIZE); + fbuf_size = sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD) + + fbuf.fb_len; + if (n != fbuf_size) { fprintf(stderr, "%s: bad fusebuf read\n", __func__); return (-1); } - /* check if there is data something present */ - if (fbuf.fb_len) { - fbuf.fb_dat = malloc(fbuf.fb_len); - if (fbuf.fb_dat == NULL) - return (-1); - ioexch.fbxch_uuid = fbuf.fb_uuid; - ioexch.fbxch_len = fbuf.fb_len; - ioexch.fbxch_data = fbuf.fb_dat; - - if (ioctl(fuse->fc->fd, FIOCGETFBDAT, - &ioexch) == -1) { - free(fbuf.fb_dat); - return (-1); - } - } - ctx.fuse = fuse; ctx.uid = fbuf.fb_uid; ctx.gid = fbuf.fb_gid; @@ -238,26 +224,13 @@ fuse_loop(struct fuse *fuse) return (-1); } - n = write(fuse->fc->fd, &fbuf, sizeof(fbuf)); - if (fbuf.fb_len) { - if (fbuf.fb_dat == NULL) { - fprintf(stderr, "%s: fb_dat is Null\n", - __func__); - return (-1); - } - ioexch.fbxch_uuid = fbuf.fb_uuid; - ioexch.fbxch_len = fbuf.fb_len; - ioexch.fbxch_data = fbuf.fb_dat; - - if (ioctl(fuse->fc->fd, FIOCSETFBDAT, &ioexch) == -1) { - free(fbuf.fb_dat); - return (-1); - } - free(fbuf.fb_dat); - } + fbuf_size = sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD) + + fbuf.fb_len; + n = write(fuse->fc->fd, &fbuf, fbuf_size); + ictx = NULL; - if (n != FUSEBUFSIZE) { + if (n != fbuf_size) { errno = EINVAL; return (-1); } Index: lib/libfuse/fuse_ops.c =================================================================== RCS file: /cvs/src/lib/libfuse/fuse_ops.c,v retrieving revision 1.35 diff -u -p -r1.35 fuse_ops.c --- lib/libfuse/fuse_ops.c 16 Jul 2018 13:10:53 -0000 1.35 +++ lib/libfuse/fuse_ops.c 8 Jun 2021 14:15:29 -0000 @@ -15,6 +15,8 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include <sys/param.h> + #include <errno.h> #include <string.h> #include <stdlib.h> @@ -317,17 +319,9 @@ ifuse_ops_readdir(struct fuse *f, struct offset = fbuf->fb_io_off; size = fbuf->fb_io_len; - fbuf->fb_dat = calloc(1, size); - - if (fbuf->fb_dat == NULL) { - fbuf->fb_err = -errno; - return (0); - } - vn = tree_get(&f->vnode_tree, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } @@ -344,7 +338,6 @@ ifuse_ops_readdir(struct fuse *f, struct realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } @@ -362,9 +355,6 @@ ifuse_ops_readdir(struct fuse *f, struct else if (fd.full && fbuf->fb_len == 0) fbuf->fb_err = -ENOBUFS; - if (fbuf->fb_len == 0) - free(fbuf->fb_dat); - return (0); } @@ -526,7 +516,6 @@ ifuse_ops_lookup(struct fuse *f, struct fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } set_vn(f, vn); /*XXX*/ @@ -537,13 +526,11 @@ ifuse_ops_lookup(struct fuse *f, struct realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn); fbuf->fb_ino = vn->ino; - free(fbuf->fb_dat); free(realname); return (0); @@ -566,23 +553,15 @@ ifuse_ops_read(struct fuse *f, struct fu size = fbuf->fb_io_len; offset = fbuf->fb_io_off; - fbuf->fb_dat = malloc(size); - if (fbuf->fb_dat == NULL) { - fbuf->fb_err = -errno; - return (0); - } - vn = tree_get(&f->vnode_tree, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } @@ -593,9 +572,6 @@ ifuse_ops_read(struct fuse *f, struct fu else fbuf->fb_err = ret; - if (fbuf->fb_len == 0) - free(fbuf->fb_dat); - return (0); } @@ -621,20 +597,17 @@ ifuse_ops_write(struct fuse *f, struct f vn = tree_get(&f->vnode_tree, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } ret = f->op.write(realname, (char *)fbuf->fb_dat, size, offset, &ffi); free(realname); - free(fbuf->fb_dat); if (ret >= 0) fbuf->fb_io_len = ret; @@ -657,11 +630,9 @@ ifuse_ops_mkdir(struct fuse *f, struct f vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } - free(fbuf->fb_dat); realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; @@ -690,11 +661,9 @@ ifuse_ops_rmdir(struct fuse *f, struct f vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } - free(fbuf->fb_dat); realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; @@ -738,13 +707,8 @@ ifuse_ops_readlink(struct fuse *f, struc fbuf->fb_err = ret; if (!ret) { - len = strnlen(name, PATH_MAX); + len = strnlen(name, MIN(PATH_MAX, FUSEBUFMAXSIZE)); fbuf->fb_len = len; - fbuf->fb_dat = malloc(fbuf->fb_len); - if (fbuf->fb_dat == NULL) { - fbuf->fb_err = -errno; - return (0); - } memcpy(fbuf->fb_dat, name, len); } else fbuf->fb_len = 0; @@ -762,12 +726,10 @@ ifuse_ops_unlink(struct fuse *f, struct vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); if (vn == NULL) { - free(fbuf->fb_dat); fbuf->fb_err = -errno; return (0); } - free(fbuf->fb_dat); realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; @@ -821,11 +783,9 @@ ifuse_ops_link(struct fuse *f, struct fu vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } - free(fbuf->fb_dat); realname = build_realname(f, oldnodeid); if (realname == NULL) { fbuf->fb_err = -errno; @@ -863,14 +823,12 @@ ifuse_ops_setattr(struct fuse *f, struct vn = tree_get(&f->vnode_tree, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } io = fbtod(fbuf, struct fb_io *); @@ -923,7 +881,6 @@ ifuse_ops_setattr(struct fuse *f, struct if (!fbuf->fb_err) fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn); free(realname); - free(fbuf->fb_dat); return (0); } @@ -940,7 +897,6 @@ ifuse_ops_symlink(unused struct fuse *f, vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } @@ -949,7 +905,6 @@ ifuse_ops_symlink(unused struct fuse *f, realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } @@ -957,7 +912,6 @@ ifuse_ops_symlink(unused struct fuse *f, fbuf->fb_err = f->op.symlink((const char *)&fbuf->fb_dat[len + 1], realname); fbuf->fb_ino = vn->ino; - free(fbuf->fb_dat); free(realname); return (0); @@ -978,7 +932,6 @@ ifuse_ops_rename(struct fuse *f, struct vnf = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); if (vnf == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } @@ -986,12 +939,9 @@ ifuse_ops_rename(struct fuse *f, struct fbuf->fb_io_ino); if (vnt == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } - free(fbuf->fb_dat); - realnamef = build_realname(f, vnf->ino); if (realnamef == NULL) { fbuf->fb_err = -errno; @@ -1060,11 +1010,9 @@ ifuse_ops_mknod(struct fuse *f, struct f vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } - free(fbuf->fb_dat); realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; Index: sys/miscfs/fuse/fuse_device.c =================================================================== RCS file: /cvs/src/sys/miscfs/fuse/fuse_device.c,v retrieving revision 1.36 diff -u -p -r1.36 fuse_device.c --- sys/miscfs/fuse/fuse_device.c 11 Mar 2021 13:31:35 -0000 1.36 +++ sys/miscfs/fuse/fuse_device.c 8 Jun 2021 14:15:33 -0000 @@ -383,7 +383,6 @@ fuseread(dev_t dev, struct uio *uio, int struct fuse_d *fd; struct fusebuf *fbuf; struct fb_hdr hdr; - void *tmpaddr; int error = 0; fd = fuse_lookup(minor(dev)); @@ -399,32 +398,39 @@ fuseread(dev_t dev, struct uio *uio, int fbuf = SIMPLEQ_FIRST(&fd->fd_fbufs_in); /* We get the whole fusebuf or nothing */ - if (uio->uio_resid != FUSEBUFSIZE) + if (uio->uio_resid < FUSEBUFSIZE + fbuf->fb_len) return (EINVAL); /* Do not send kernel pointers */ memcpy(&hdr.fh_next, &fbuf->fb_next, sizeof(fbuf->fb_next)); memset(&fbuf->fb_next, 0, sizeof(fbuf->fb_next)); - tmpaddr = fbuf->fb_dat; - fbuf->fb_dat = NULL; - error = uiomove(fbuf, FUSEBUFSIZE, uio); + + error = uiomove(&fbuf->fb_hdr, sizeof(fbuf->fb_hdr), uio); + if (error) + goto end; + error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio); if (error) goto end; + if (fbuf->fb_len > 0) { + error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio); + if (error) + goto end; + } #ifdef FUSE_DEBUG fuse_dump_buff((char *)fbuf, FUSEBUFSIZE); #endif /* Restore kernel pointers */ memcpy(&fbuf->fb_next, &hdr.fh_next, sizeof(fbuf->fb_next)); - fbuf->fb_dat = tmpaddr; - /* Remove the fbuf if it does not contains data */ - if (fbuf->fb_len == 0) { - SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next); - stat_fbufs_in--; - SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next); - stat_fbufs_wait++; - } + free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len); + fbuf->fb_dat = NULL; + + /* Move the fbuf to the wait queue */ + SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next); + stat_fbufs_in--; + SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next); + stat_fbufs_wait++; end: return (error); @@ -443,13 +449,17 @@ fusewrite(dev_t dev, struct uio *uio, in if (fd == NULL) return (ENXIO); - /* We get the whole fusebuf or nothing */ - if (uio->uio_resid != FUSEBUFSIZE) - return (EINVAL); - if ((error = uiomove(&hdr, sizeof(hdr), uio)) != 0) return (error); + /* Check for sanity */ + if (hdr.fh_len > FUSEBUFMAXSIZE) + return (EINVAL); + + /* We get the whole fusebuf or nothing */ + if (uio->uio_resid != sizeof(fbuf->FD) + hdr.fh_len) + return (EINVAL); + /* looking for uuid in fd_fbufs_wait */ SIMPLEQ_FOREACH(fbuf, &fd->fd_fbufs_wait, fb_next) { if (fbuf->fb_uuid == hdr.fh_uuid) @@ -474,10 +484,19 @@ fusewrite(dev_t dev, struct uio *uio, in } /* Get the missing data from the fbuf */ - error = uiomove(&fbuf->FD, uio->uio_resid, uio); + error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio); if (error) return error; - fbuf->fb_dat = NULL; + + fbuf->fb_dat = malloc(fbuf->fb_len, M_FUSEFS, + M_WAITOK | M_ZERO); + error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio); + if (error) { + free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len); + fbuf->fb_dat = NULL; + return (error); + } + #ifdef FUSE_DEBUG fuse_dump_buff((char *)fbuf, FUSEBUFSIZE); #endif @@ -491,19 +510,17 @@ fusewrite(dev_t dev, struct uio *uio, in break ; } end: - /* Remove the fbuf if it does not contains data */ - if (fbuf->fb_len == 0) { - if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait)) - SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next); - else - SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf, - fb_next); - stat_fbufs_wait--; - if (fbuf->fb_type == FBT_INIT) - fb_delete(fbuf); - else - wakeup(fbuf); - } + /* Remove the fbuf from the wait queue */ + if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait)) + SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next); + else + SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf, + fb_next); + stat_fbufs_wait--; + if (fbuf->fb_type == FBT_INIT) + fb_delete(fbuf); + else + wakeup(fbuf); return (error); } Index: sys/sys/fusebuf.h =================================================================== RCS file: /cvs/src/sys/sys/fusebuf.h,v retrieving revision 1.13 diff -u -p -r1.13 fusebuf.h --- sys/sys/fusebuf.h 19 Jun 2018 11:27:54 -0000 1.13 +++ sys/sys/fusebuf.h 8 Jun 2021 14:15:33 -0000 @@ -69,7 +69,11 @@ struct fusebuf { struct stat FD_attr; /* for attr vnops */ struct fb_io FD_io; /* for file io vnops */ } FD; +#ifdef _KERNEL uint8_t *fb_dat; /* data's */ +#else + uint8_t fb_dat[FUSEBUFMAXSIZE]; /* userland does i/o with fixed size buffer */ +#endif }; #define fb_next fb_hdr.fh_next