On Wed, Jul 03, 2019 at 11:49:51PM +0200, Alexander Bluhm wrote: > Hi, > > I would like to remove a useless kernel lock during socket splicing. > > We have a socket "so" that splices data to socket "sosp". Everytime > when space in sosp gets available, we add a task to move data from > so to sosp. Additionally we call sowakeup() from sowwakeup(). I > have added this as it is possible to splice from so to sosp and > write from userland to sosp simultaneously. In practice this does > not make sense as the data streams from two sources would get mixed. > Nothing uses this. So it is not neccessay to inform userland that > it is possible to write. > > Note that sowakeup() takes the kernel lock. So when I do not call > it for a spliced socket, there is no kernel lock in the TCP splicing > path anymore. > > But then I have to wakeup userland for the wirting socket in > sounsplice(). > > ok?
I think in general this makes sense. One comment inline. > Index: sys/kern/uipc_socket.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v > retrieving revision 1.231 > diff -u -p -r1.231 uipc_socket.c > --- sys/kern/uipc_socket.c 17 Dec 2018 16:46:59 -0000 1.231 > +++ sys/kern/uipc_socket.c 3 Jul 2019 13:56:09 -0000 > @@ -220,9 +220,10 @@ sofree(struct socket *so, int s) > if (so->so_sp) { > if (issplicedback(so)) > sounsplice(so->so_sp->ssp_soback, so, > - so->so_sp->ssp_soback != so); > + so->so_sp->ssp_soback == so ? 3 : 2); > if (isspliced(so)) > - sounsplice(so, so->so_sp->ssp_socket, 0); > + sounsplice(so, so->so_sp->ssp_socket, > + so == so->so_sp->ssp_socket ? 3 : 1); > } > #endif /* SOCKET_SPLICE */ > sbrelease(so, &so->so_snd); > @@ -1148,7 +1149,7 @@ sosplice(struct socket *so, int fd, off_ > return (error); > } > if (so->so_sp->ssp_socket) > - sounsplice(so, so->so_sp->ssp_socket, 1); > + sounsplice(so, so->so_sp->ssp_socket, 0); > sbunlock(so, &so->so_rcv); > return (0); > } > @@ -1227,7 +1228,7 @@ sosplice(struct socket *so, int fd, off_ > } > > void > -sounsplice(struct socket *so, struct socket *sosp, int wakeup) > +sounsplice(struct socket *so, struct socket *sosp, int freeing) > { > soassertlocked(so); > > @@ -1236,8 +1237,11 @@ sounsplice(struct socket *so, struct soc > sosp->so_snd.sb_flags &= ~SB_SPLICE; > so->so_rcv.sb_flags &= ~SB_SPLICE; > so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; > - if (wakeup && soreadable(so)) > + /* Do not wakeup a socket that is about to be freed. */ > + if ((freeing & 1) == 0 && soreadable(so)) > sorwakeup(so); > + if ((freeing & 2) == 0 && sowriteable(sosp)) > + sowwakeup(sosp); Would it be possible to use some #defined flags here instead of 1,2,3? Maybe use FREAD/FWRITE or define something new. > } > > void > @@ -1249,7 +1253,7 @@ soidle(void *arg) > s = solock(so); > if (so->so_rcv.sb_flags & SB_SPLICE) { > so->so_error = ETIMEDOUT; > - sounsplice(so, so->so_sp->ssp_socket, 1); > + sounsplice(so, so->so_sp->ssp_socket, 0); > } > sounlock(so, s); > } > @@ -1574,7 +1578,7 @@ somove(struct socket *so, int wait) > so->so_error = error; > if (((so->so_state & SS_CANTRCVMORE) && so->so_rcv.sb_cc == 0) || > (sosp->so_state & SS_CANTSENDMORE) || maxreached || error) { > - sounsplice(so, sosp, 1); > + sounsplice(so, sosp, 0); > return (0); > } > if (timerisset(&so->so_idletv)) > @@ -1620,6 +1624,8 @@ sowwakeup(struct socket *so) > #ifdef SOCKET_SPLICE > if (so->so_snd.sb_flags & SB_SPLICE) > task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask); > + if (issplicedback(so)) > + return; > #endif > sowakeup(so, &so->so_snd); > } > Index: share/man/man9/sosplice.9 > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/share/man/man9/sosplice.9,v > retrieving revision 1.9 > diff -u -p -r1.9 sosplice.9 > --- share/man/man9/sosplice.9 15 Aug 2018 12:10:49 -0000 1.9 > +++ share/man/man9/sosplice.9 3 Jul 2019 21:42:34 -0000 > @@ -206,10 +206,13 @@ will call > to trigger the transfer when new data or buffer space is available. > While socket splicing is active, any > .Xr read 2 > -from the source socket will block and the wakeup will not be delivered > -to the file descriptor. > -A read event or a socket error is signaled to userland after > -dissolving. > +from the source socket will block. > +Neither read nor write wakeups will be delivered to the file > +descriptors. > +After dissolving, a read event or a socket error is signaled to > +userland on the source socket. > +If space is available, a write event will be signaled on the drain > +socket. > .Sh RETURN VALUES > .Fn sosplice > returns 0 on success and otherwise the error number. > -- :wq Claudio