First off note the patch below is a total hack with the easiest solution
possible so that it can be MFCed for 10.2.

The issue:

Closing the socket involves:

if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
        (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
if (pr->pr_usrreqs->pru_detach != NULL)
        (*pr->pr_usrreqs->pru_detach)(so);

unp_dispose which gets rid of file descriptors stored in mbufs attached
to the socket. It leaves the mbuf chain in place.

uipc_detach actually unlinks the socket from global unp list.

In particular, this means there is a socket with unusable mbufs visible
to unp garbage collector (unp_gc). Also there is no synchronisation of
any form performed here, so it can inspect mbufs as fds are getting
freed (or afterwards) leading to panics. Note that uipc_detach waits for
unp_gc to finish due to UNP_LIST_LOCK, it's the dispose func which
causes trouble.

Given that stuff should not be accessed after unp_dispose, and the
socket is about to die I figured it would be best to mark the socket so
that unp_gc can ignore it.

Note that unp_dispose only gets a pointer to mbuf. I have not found any
way to obtain a socket from this, which in turn results in the hack
below.

I added a new func - unp_dispose2, which is not a part of struct domain.
dom_dispose consumers check for PR_DISPOSE2 flag and call the function
passing the socket as an argument.

unp_dispose2(struct socket *so)
{
          struct unpcb *unp;
  
          unp = sotounpcb(so);
          UNP_LIST_LOCK();
          unp->unp_gcflag |= UNPGC_IGNORE;
          UNP_LIST_UNLOCK();
          unp_dispose(so->so_rcv.sb_mb);
}

The UNP_LIST_LOCK + UNLOCK synchronizes against unp_gc  - either it sees the
flag and ignores the socket, or gets to inspect it and unp_dispose2 waits for
it to finish. AFAICT it is completely harmless to proceed with freeing after
unp_gc had a look.

There is a similar problem with shutdown(), but the race has a smaller window
due to it clearing mbufs just after dispose call.

In general, it feels like something else is also broken, but I don't see what.

The issue can be reproduced by running this program in a loop:
https://people.freebsd.org/~mjg/reproducers/unp-gc-panic.c

With the patch below the issue seems to be fixed:

diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index a431b4b..d0e11ce 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -804,8 +804,13 @@ sofree(struct socket *so)
        ACCEPT_UNLOCK();
 
        VNET_SO_ASSERT(so);
-       if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
-               (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
+       if (pr->pr_flags & PR_RIGHTS) {
+               if (strcmp(pr->pr_domain->dom_name, "local") == 0) {
+                       unp_dispose2(so);
+               } else if (pr->pr_domain->dom_dispose != NULL) {
+                       (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
+               }
+       }
        if (pr->pr_usrreqs->pru_detach != NULL)
                (*pr->pr_usrreqs->pru_detach)(so);
 
@@ -2393,8 +2398,13 @@ sorflush(struct socket *so)
         * Dispose of special rights and flush the socket buffer.  Don't call
         * any unsafe routines (that rely on locks being initialized) on asb.
         */
-       if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
-               (*pr->pr_domain->dom_dispose)(asb.sb_mb);
+       if (pr->pr_flags & PR_RIGHTS) {
+               if (strcmp(pr->pr_domain->dom_name, "local") == 0) {
+                       unp_dispose2(so);
+               } else if (pr->pr_domain->dom_dispose != NULL) {
+                       (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
+               }
+       }
        sbrelease_internal(&asb, so);
 }
 
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index acf9fe9..6c280aa 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -2193,8 +2193,7 @@ unp_gc_process(struct unpcb *unp)
        struct socket *so;
        struct file *fp;
 
-       /* Already processed. */
-       if (unp->unp_gcflag & UNPGC_SCANNED)
+       if (unp->unp_gcflag & (UNPGC_SCANNED | UNPGC_IGNORE))
                return;
        fp = unp->unp_file;
 
@@ -2252,11 +2251,11 @@ unp_gc(__unused void *arg, int pending)
        unp_taskcount++;
        UNP_LIST_LOCK();
        /*
-        * First clear all gc flags from previous runs.
+        * First clear all gc flags from previous runs, apart from UNPGC_IGNORE.
         */
        for (head = heads; *head != NULL; head++)
                LIST_FOREACH(unp, *head, unp_link)
-                       unp->unp_gcflag = 0;
+                       unp->unp_gcflag = unp->unp_gcflag & UNPGC_IGNORE;
 
        /*
         * Scan marking all reachable sockets with UNPGC_REF.  Once a socket
@@ -2333,6 +2332,24 @@ unp_dispose(struct mbuf *m)
                unp_scan(m, unp_freerights);
 }
 
+/*
+ * XXX A hack working around a difenciency in domain API.
+ * dom_dispose handler does not get the socket it is supposed to operate on,
+ * which makes it very problematic to synchronize against unp_gc, which in turn
+ * can trip over data as we are freeing it.
+ */
+void
+unp_dispose2(struct socket *so)
+{
+       struct unpcb *unp;
+
+       unp = sotounpcb(so);
+       UNP_LIST_LOCK();
+       unp->unp_gcflag |= UNPGC_IGNORE;
+       UNP_LIST_UNLOCK();
+       unp_dispose(so->so_rcv.sb_mb);
+}
+
 static void
 unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
 {
diff --git a/sys/sys/socket.h b/sys/sys/socket.h
index 18e2de1..e927cdc 100644
--- a/sys/sys/socket.h
+++ b/sys/sys/socket.h
@@ -666,6 +666,8 @@ void so_unlock(struct socket *so);
 
 void so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, 
void *), void *arg);
 
+void unp_dispose2(struct socket *so);
+
 #endif
 
 
diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h
index ba63f30..ead9f0a 100644
--- a/sys/sys/unpcb.h
+++ b/sys/sys/unpcb.h
@@ -106,6 +106,7 @@ struct unpcb {
 #define        UNPGC_REF                       0x1     /* unpcb has external 
ref. */
 #define        UNPGC_DEAD                      0x2     /* unpcb might be dead. 
*/
 #define        UNPGC_SCANNED                   0x4     /* Has been scanned. */
+#define        UNPGC_IGNORE                    0x4     /* Someone will clear 
it. */
 
 /*
  * These flags are used to handle non-atomicity in connect() and bind()

-- 
Mateusz Guzik <mjguzik gmail.com>
_______________________________________________
freebsd-current@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"

Reply via email to