This diff makes pipe event filters ready to run without the kernel lock. The code pattern in the callbacks is the same as in sockets. Pipes have a klist lock already.
So far, pipe event filters have used read-locking. The patch changes that to write-locking for clarity. This should not be a real loss, though, because the lock is fine-grained and there is little multiple- readers parallelism to be utilized. OK? Index: kern/sys_pipe.c =================================================================== RCS file: src/sys/kern/sys_pipe.c,v retrieving revision 1.127 diff -u -p -r1.127 sys_pipe.c --- kern/sys_pipe.c 22 Oct 2021 05:00:26 -0000 1.127 +++ kern/sys_pipe.c 22 Oct 2021 12:17:57 -0000 @@ -78,20 +78,30 @@ static const struct fileops pipeops = { void filt_pipedetach(struct knote *kn); int filt_piperead(struct knote *kn, long hint); +int filt_pipereadmodify(struct kevent *kev, struct knote *kn); +int filt_pipereadprocess(struct knote *kn, struct kevent *kev); +int filt_piperead_common(struct knote *kn, struct pipe *rpipe); int filt_pipewrite(struct knote *kn, long hint); +int filt_pipewritemodify(struct kevent *kev, struct knote *kn); +int filt_pipewriteprocess(struct knote *kn, struct kevent *kev); +int filt_pipewrite_common(struct knote *kn, struct pipe *rpipe); const struct filterops pipe_rfiltops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_pipedetach, .f_event = filt_piperead, + .f_modify = filt_pipereadmodify, + .f_process = filt_pipereadprocess, }; const struct filterops pipe_wfiltops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_pipedetach, .f_event = filt_pipewrite, + .f_modify = filt_pipewritemodify, + .f_process = filt_pipewriteprocess, }; /* @@ -362,9 +372,7 @@ pipeselwakeup(struct pipe *cpipe) cpipe->pipe_state &= ~PIPE_SEL; selwakeup(&cpipe->pipe_sel); } else { - KERNEL_LOCK(); - KNOTE(&cpipe->pipe_sel.si_note, NOTE_SUBMIT); - KERNEL_UNLOCK(); + KNOTE(&cpipe->pipe_sel.si_note, 0); } if (cpipe->pipe_state & PIPE_ASYNC) @@ -929,45 +937,76 @@ filt_pipedetach(struct knote *kn) } int -filt_piperead(struct knote *kn, long hint) +filt_piperead_common(struct knote *kn, struct pipe *rpipe) { - struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; - struct rwlock *lock = rpipe->pipe_lock; + struct pipe *wpipe; + + rw_assert_wrlock(rpipe->pipe_lock); - if ((hint & NOTE_SUBMIT) == 0) - rw_enter_read(lock); wpipe = pipe_peer(rpipe); kn->kn_data = rpipe->pipe_buffer.cnt; if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) { - if ((hint & NOTE_SUBMIT) == 0) - rw_exit_read(lock); kn->kn_flags |= EV_EOF; if (kn->kn_flags & __EV_POLL) kn->kn_flags |= __EV_HUP; return (1); } - if ((hint & NOTE_SUBMIT) == 0) - rw_exit_read(lock); - return (kn->kn_data > 0); } int -filt_pipewrite(struct knote *kn, long hint) +filt_piperead(struct knote *kn, long hint) { - struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; - struct rwlock *lock = rpipe->pipe_lock; + struct pipe *rpipe = kn->kn_fp->f_data; + + return (filt_piperead_common(kn, rpipe)); +} + +int +filt_pipereadmodify(struct kevent *kev, struct knote *kn) +{ + struct pipe *rpipe = kn->kn_fp->f_data; + int active; + + rw_enter_write(rpipe->pipe_lock); + knote_modify(kev, kn); + active = filt_piperead_common(kn, rpipe); + rw_exit_write(rpipe->pipe_lock); + + return (active); +} + +int +filt_pipereadprocess(struct knote *kn, struct kevent *kev) +{ + struct pipe *rpipe = kn->kn_fp->f_data; + int active; + + rw_enter_write(rpipe->pipe_lock); + if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) + active = 1; + else + active = filt_piperead_common(kn, rpipe); + if (active) + knote_submit(kn, kev); + rw_exit_write(rpipe->pipe_lock); + + return (active); +} + +int +filt_pipewrite_common(struct knote *kn, struct pipe *rpipe) +{ + struct pipe *wpipe; + + rw_assert_wrlock(rpipe->pipe_lock); - if ((hint & NOTE_SUBMIT) == 0) - rw_enter_read(lock); wpipe = pipe_peer(rpipe); if (wpipe == NULL) { - if ((hint & NOTE_SUBMIT) == 0) - rw_exit_read(lock); kn->kn_data = 0; kn->kn_flags |= EV_EOF; if (kn->kn_flags & __EV_POLL) @@ -976,12 +1015,49 @@ filt_pipewrite(struct knote *kn, long hi } kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; - if ((hint & NOTE_SUBMIT) == 0) - rw_exit_read(lock); - return (kn->kn_data >= PIPE_BUF); } +int +filt_pipewrite(struct knote *kn, long hint) +{ + struct pipe *rpipe = kn->kn_fp->f_data; + + return (filt_pipewrite_common(kn, rpipe)); +} + +int +filt_pipewritemodify(struct kevent *kev, struct knote *kn) +{ + struct pipe *rpipe = kn->kn_fp->f_data; + int active; + + rw_enter_write(rpipe->pipe_lock); + knote_modify(kev, kn); + active = filt_pipewrite_common(kn, rpipe); + rw_exit_write(rpipe->pipe_lock); + + return (active); +} + +int +filt_pipewriteprocess(struct knote *kn, struct kevent *kev) +{ + struct pipe *rpipe = kn->kn_fp->f_data; + int active; + + rw_enter_write(rpipe->pipe_lock); + if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) + active = 1; + else + active = filt_pipewrite_common(kn, rpipe); + if (active) + knote_submit(kn, kev); + rw_exit_write(rpipe->pipe_lock); + + return (active); +} + void pipe_init(void) {