sys_connect() currently relies on the KERNEL_LOCK() to be sure
`so_state' isn't changing while it manipulates the socket.  Since
we want to remove the KERNEL_LOCK() from the Network Stack, we need
a way to guaranty this atomicity.

Diff below extends the scope of the socket lock.  It has been previously
introduced in sys_connect(), first as NET_LOCK() then renamed, where old
splsofnet() were used.  But we now need to "move the line up" in order to
protect fields currently relying on the KERNEL_LOCK().

Moving the line up means that soconnect() and soconnect2() will now
assert for the socket lock.  This will allow to remove superfluous lock-
unlock dances, especially in NFS.

ok?

Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.194
diff -u -p -r1.194 uipc_socket.c
--- kern/uipc_socket.c  13 Jul 2017 16:19:38 -0000      1.194
+++ kern/uipc_socket.c  18 Jul 2017 06:46:18 -0000
@@ -311,11 +311,12 @@ soaccept(struct socket *so, struct mbuf 
 int
 soconnect(struct socket *so, struct mbuf *nam)
 {
-       int s, error;
+       int error;
+
+       soassertlocked(so);
 
        if (so->so_options & SO_ACCEPTCONN)
                return (EOPNOTSUPP);
-       s = solock(so);
        /*
         * If protocol is connection-based, can only connect once.
         * Otherwise, if connected, try to disconnect first.
@@ -329,19 +330,17 @@ soconnect(struct socket *so, struct mbuf
        else
                error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
                    NULL, nam, NULL, curproc);
-       sounlock(s);
        return (error);
 }
 
 int
 soconnect2(struct socket *so1, struct socket *so2)
 {
-       int s, error;
+       int error;
 
-       s = solock(so1);
+       soassertlocked(so1);
        error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL,
            (struct mbuf *)so2, NULL, curproc);
-       sounlock(s);
        return (error);
 }
 
Index: kern/uipc_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.153
diff -u -p -r1.153 uipc_syscalls.c
--- kern/uipc_syscalls.c        12 Jul 2017 09:25:47 -0000      1.153
+++ kern/uipc_syscalls.c        18 Jul 2017 06:46:18 -0000
@@ -373,9 +373,10 @@ sys_connect(struct proc *p, void *v, reg
        if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
                return (error);
        so = fp->f_data;
+       s = solock(so);
        if (so->so_state & SS_ISCONNECTING) {
-               FRELE(fp, p);
-               return (EALREADY);
+               error = EALREADY;
+               goto out;
        }
        error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
            MT_SONAME);
@@ -393,11 +394,8 @@ sys_connect(struct proc *p, void *v, reg
        if (isdnssocket(so)) {
                u_int namelen = nam->m_len;
                error = dns_portcheck(p, so, mtod(nam, void *), &namelen);
-               if (error) {
-                       FRELE(fp, p);
-                       m_freem(nam);
-                       return (error);
-               }
+               if (error)
+                       goto out;
                nam->m_len = namelen;
        }
 
@@ -405,11 +403,9 @@ sys_connect(struct proc *p, void *v, reg
        if (error)
                goto bad;
        if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
-               FRELE(fp, p);
-               m_freem(nam);
-               return (EINPROGRESS);
+               error = EINPROGRESS;
+               goto out;
        }
-       s = solock(so);
        while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
                error = sosleep(so, &so->so_timeo, PSOCK | PCATCH,
                    "netcon2", 0);
@@ -423,10 +419,11 @@ sys_connect(struct proc *p, void *v, reg
                error = so->so_error;
                so->so_error = 0;
        }
-       sounlock(s);
 bad:
        if (!interrupted)
                so->so_state &= ~SS_ISCONNECTING;
+out:
+       sounlock(s);
        FRELE(fp, p);
        m_freem(nam);
        if (error == ERESTART)
Index: miscfs/fifofs/fifo_vnops.c
===================================================================
RCS file: /cvs/src/sys/miscfs/fifofs/fifo_vnops.c,v
retrieving revision 1.57
diff -u -p -r1.57 fifo_vnops.c
--- miscfs/fifofs/fifo_vnops.c  8 Jul 2017 09:19:02 -0000       1.57
+++ miscfs/fifofs/fifo_vnops.c  18 Jul 2017 06:46:18 -0000
@@ -124,7 +124,7 @@ fifo_open(void *v)
        struct fifoinfo *fip;
        struct proc *p = ap->a_p;
        struct socket *rso, *wso;
-       int error;
+       int s, error;
 
        if ((fip = vp->v_fifoinfo) == NULL) {
                fip = malloc(sizeof(*fip), M_VNODE, M_WAITOK);
@@ -135,6 +135,11 @@ fifo_open(void *v)
                        return (error);
                }
                fip->fi_readsock = rso;
+               /*
+                * XXXSMP
+                * We only lock `wso' because AF_LOCAL sockets are
+                * still relying on the KERNEL_LOCK().
+                */
                if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0)) != 0) {
                        (void)soclose(rso);
                        free(fip, M_VNODE, sizeof *fip);
@@ -142,7 +147,9 @@ fifo_open(void *v)
                        return (error);
                }
                fip->fi_writesock = wso;
+               s = solock(wso);
                if ((error = soconnect2(wso, rso)) != 0) {
+                       sounlock(s);
                        (void)soclose(wso);
                        (void)soclose(rso);
                        free(fip, M_VNODE, sizeof *fip);
@@ -152,11 +159,15 @@ fifo_open(void *v)
                fip->fi_readers = fip->fi_writers = 0;
                wso->so_state |= SS_CANTSENDMORE;
                wso->so_snd.sb_lowat = PIPE_BUF;
+       } else {
+               rso = fip->fi_readsock;
+               wso = fip->fi_writesock;
+               s = solock(wso);
        }
        if (ap->a_mode & FREAD) {
                fip->fi_readers++;
                if (fip->fi_readers == 1) {
-                       fip->fi_writesock->so_state &= ~SS_CANTSENDMORE;
+                       wso->so_state &= ~SS_CANTSENDMORE;
                        if (fip->fi_writers > 0)
                                wakeup(&fip->fi_writers);
                }
@@ -168,11 +179,12 @@ fifo_open(void *v)
                        goto bad;
                }
                if (fip->fi_writers == 1) {
-                       fip->fi_readsock->so_state &= 
~(SS_CANTRCVMORE|SS_ISDISCONNECTED);
+                       rso->so_state &= ~(SS_CANTRCVMORE|SS_ISDISCONNECTED);
                        if (fip->fi_readers > 0)
                                wakeup(&fip->fi_readers);
                }
        }
+       sounlock(s);
        if ((ap->a_mode & O_NONBLOCK) == 0) {
                if ((ap->a_mode & FREAD) && fip->fi_writers == 0) {
                        VOP_UNLOCK(vp, p);
@@ -246,6 +258,7 @@ fifo_write(void *v)
        if (ap->a_uio->uio_rw != UIO_WRITE)
                panic("fifo_write mode");
 #endif
+       /* XXXSMP changing state w/o lock isn't safe. */
        if (ap->a_ioflag & IO_NDELAY)
                wso->so_state |= SS_NBIO;
        VOP_UNLOCK(ap->a_vp, p);
Index: nfs/nfs_socket.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_socket.c,v
retrieving revision 1.119
diff -u -p -r1.119 nfs_socket.c
--- nfs/nfs_socket.c    27 Jun 2017 12:02:43 -0000      1.119
+++ nfs/nfs_socket.c    18 Jul 2017 06:46:18 -0000
@@ -297,16 +297,18 @@ nfs_connect(struct nfsmount *nmp, struct
                        goto bad;
                }
        } else {
+               s = solock(so);
                error = soconnect(so, nmp->nm_nam);
-               if (error)
+               if (error) {
+                       sounlock(s);
                        goto bad;
+               }
 
                /*
                 * Wait for the connection to complete. Cribbed from the
                 * connect system call but with the wait timing out so
                 * that interruptible mounts don't hang here for a long time.
                 */
-               s = solock(so);
                while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
                        sosleep(so, &so->so_timeo, PSOCK, "nfscon", 2 * hz);
                        if ((so->so_state & SS_ISCONNECTING) &&

Reply via email to