This diff makes pipe event filters ready to run without the kernel lock.
The code pattern in the callbacks is the same as in sockets. Pipes
have a klist lock already.

So far, pipe event filters have used read-locking. The patch changes
that to write-locking for clarity. This should not be a real loss,
though, because the lock is fine-grained and there is little multiple-
readers parallelism to be utilized.

OK?

Index: kern/sys_pipe.c
===================================================================
RCS file: src/sys/kern/sys_pipe.c,v
retrieving revision 1.127
diff -u -p -r1.127 sys_pipe.c
--- kern/sys_pipe.c     22 Oct 2021 05:00:26 -0000      1.127
+++ kern/sys_pipe.c     22 Oct 2021 12:17:57 -0000
@@ -78,20 +78,30 @@ static const struct fileops pipeops = {
 
 void   filt_pipedetach(struct knote *kn);
 int    filt_piperead(struct knote *kn, long hint);
+int    filt_pipereadmodify(struct kevent *kev, struct knote *kn);
+int    filt_pipereadprocess(struct knote *kn, struct kevent *kev);
+int    filt_piperead_common(struct knote *kn, struct pipe *rpipe);
 int    filt_pipewrite(struct knote *kn, long hint);
+int    filt_pipewritemodify(struct kevent *kev, struct knote *kn);
+int    filt_pipewriteprocess(struct knote *kn, struct kevent *kev);
+int    filt_pipewrite_common(struct knote *kn, struct pipe *rpipe);
 
 const struct filterops pipe_rfiltops = {
-       .f_flags        = FILTEROP_ISFD,
+       .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach       = NULL,
        .f_detach       = filt_pipedetach,
        .f_event        = filt_piperead,
+       .f_modify       = filt_pipereadmodify,
+       .f_process      = filt_pipereadprocess,
 };
 
 const struct filterops pipe_wfiltops = {
-       .f_flags        = FILTEROP_ISFD,
+       .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach       = NULL,
        .f_detach       = filt_pipedetach,
        .f_event        = filt_pipewrite,
+       .f_modify       = filt_pipewritemodify,
+       .f_process      = filt_pipewriteprocess,
 };
 
 /*
@@ -362,9 +372,7 @@ pipeselwakeup(struct pipe *cpipe)
                cpipe->pipe_state &= ~PIPE_SEL;
                selwakeup(&cpipe->pipe_sel);
        } else {
-               KERNEL_LOCK();
-               KNOTE(&cpipe->pipe_sel.si_note, NOTE_SUBMIT);
-               KERNEL_UNLOCK();
+               KNOTE(&cpipe->pipe_sel.si_note, 0);
        }
 
        if (cpipe->pipe_state & PIPE_ASYNC)
@@ -929,45 +937,76 @@ filt_pipedetach(struct knote *kn)
 }
 
 int
-filt_piperead(struct knote *kn, long hint)
+filt_piperead_common(struct knote *kn, struct pipe *rpipe)
 {
-       struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
-       struct rwlock *lock = rpipe->pipe_lock;
+       struct pipe *wpipe;
+
+       rw_assert_wrlock(rpipe->pipe_lock);
 
-       if ((hint & NOTE_SUBMIT) == 0)
-               rw_enter_read(lock);
        wpipe = pipe_peer(rpipe);
 
        kn->kn_data = rpipe->pipe_buffer.cnt;
 
        if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) {
-               if ((hint & NOTE_SUBMIT) == 0)
-                       rw_exit_read(lock);
                kn->kn_flags |= EV_EOF; 
                if (kn->kn_flags & __EV_POLL)
                        kn->kn_flags |= __EV_HUP;
                return (1);
        }
 
-       if ((hint & NOTE_SUBMIT) == 0)
-               rw_exit_read(lock);
-
        return (kn->kn_data > 0);
 }
 
 int
-filt_pipewrite(struct knote *kn, long hint)
+filt_piperead(struct knote *kn, long hint)
 {
-       struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
-       struct rwlock *lock = rpipe->pipe_lock;
+       struct pipe *rpipe = kn->kn_fp->f_data;
+
+       return (filt_piperead_common(kn, rpipe));
+}
+
+int
+filt_pipereadmodify(struct kevent *kev, struct knote *kn)
+{
+       struct pipe *rpipe = kn->kn_fp->f_data;
+       int active;
+
+       rw_enter_write(rpipe->pipe_lock);
+       knote_modify(kev, kn);
+       active = filt_piperead_common(kn, rpipe);
+       rw_exit_write(rpipe->pipe_lock);
+
+       return (active);
+}
+
+int
+filt_pipereadprocess(struct knote *kn, struct kevent *kev)
+{
+       struct pipe *rpipe = kn->kn_fp->f_data;
+       int active;
+
+       rw_enter_write(rpipe->pipe_lock);
+       if (kev != NULL && (kn->kn_flags & EV_ONESHOT))
+               active = 1;
+       else
+               active = filt_piperead_common(kn, rpipe);
+       if (active)
+               knote_submit(kn, kev);
+       rw_exit_write(rpipe->pipe_lock);
+
+       return (active);
+}
+
+int
+filt_pipewrite_common(struct knote *kn, struct pipe *rpipe)
+{
+       struct pipe *wpipe;
+
+       rw_assert_wrlock(rpipe->pipe_lock);
 
-       if ((hint & NOTE_SUBMIT) == 0)
-               rw_enter_read(lock);
        wpipe = pipe_peer(rpipe);
 
        if (wpipe == NULL) {
-               if ((hint & NOTE_SUBMIT) == 0)
-                       rw_exit_read(lock);
                kn->kn_data = 0;
                kn->kn_flags |= EV_EOF; 
                if (kn->kn_flags & __EV_POLL)
@@ -976,12 +1015,49 @@ filt_pipewrite(struct knote *kn, long hi
        }
        kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
 
-       if ((hint & NOTE_SUBMIT) == 0)
-               rw_exit_read(lock);
-
        return (kn->kn_data >= PIPE_BUF);
 }
 
+int
+filt_pipewrite(struct knote *kn, long hint)
+{
+       struct pipe *rpipe = kn->kn_fp->f_data;
+
+       return (filt_pipewrite_common(kn, rpipe));
+}
+
+int
+filt_pipewritemodify(struct kevent *kev, struct knote *kn)
+{
+       struct pipe *rpipe = kn->kn_fp->f_data;
+       int active;
+
+       rw_enter_write(rpipe->pipe_lock);
+       knote_modify(kev, kn);
+       active = filt_pipewrite_common(kn, rpipe);
+       rw_exit_write(rpipe->pipe_lock);
+
+       return (active);
+}
+
+int
+filt_pipewriteprocess(struct knote *kn, struct kevent *kev)
+{
+       struct pipe *rpipe = kn->kn_fp->f_data;
+       int active;
+
+       rw_enter_write(rpipe->pipe_lock);
+       if (kev != NULL && (kn->kn_flags & EV_ONESHOT))
+               active = 1;
+       else
+               active = filt_pipewrite_common(kn, rpipe);
+       if (active)
+               knote_submit(kn, kev);
+       rw_exit_write(rpipe->pipe_lock);
+
+       return (active);
+}
+
 void
 pipe_init(void)
 {

Reply via email to