Okay, this is the problem: gmail-researchut: spamprobe receive: io: poll: Connection timed out
Your pipe process does not return in the timeout, so fdm aborts the fetch. Unfortunately this then causes various things which sometimes end up with fdm not exiting correctly. fdm's management of child processes isn't always very good, particularly when an error occurs - sometimes stuff doesn't die in the right order and it ends up with fatal errors. The whole multiprocess idea is to allow fdm to run lots of things in parallel and to drop privileges when running as root, but I'm not sure it is worth it now, I should really simplify it a lot and probably forget about using it as root. You can see similar problems with a configuration file such as: set timeout 2 account 'stdin' stdin match exec 'sleep 10' returns (0, ) action drop And then eg echo|fdm -vvvv f This diff makes several changes which hopefully should make it a bit more robust and correctly handle the various child processes on exit. Please test. Index: child-deliver.c =================================================================== RCS file: /cvsroot/fdm/fdm/child-deliver.c,v retrieving revision 1.21 diff -u -p -r1.21 child-deliver.c --- child-deliver.c 17 May 2009 19:20:08 -0000 1.21 +++ child-deliver.c 24 May 2009 14:56:51 -0000 @@ -61,10 +61,10 @@ child_deliver(struct child *child, struc if (privsep_send(pio, &msg, &msgbuf) != 0) fatalx("privsep_send error"); - if (privsep_recv(pio, &msg, NULL) != 0) - fatalx("privsep_recv error"); - if (msg.type != MSG_EXIT) - fatalx("unexpected message"); + do { + if (privsep_recv(pio, &msg, NULL) != 0) + fatalx("privsep_recv error"); + } while (msg.type != MSG_EXIT); #ifdef DEBUG COUNTFDS(a->name); Index: child-fetch.c =================================================================== RCS file: /cvsroot/fdm/fdm/child-fetch.c,v retrieving revision 1.72 diff -u -p -r1.72 child-fetch.c --- child-fetch.c 17 May 2009 18:23:45 -0000 1.72 +++ child-fetch.c 24 May 2009 14:56:51 -0000 @@ -127,11 +127,11 @@ child_fetch(struct child *child, struct log_debug3("%s: sending exit message to parent", a->name); if (privsep_send(pio, &msg, NULL) != 0) fatalx("privsep_send error"); - log_debug3("%s: waiting for exit message from parent", a->name); - if (privsep_recv(pio, &msg, NULL) != 0) - fatalx("privsep_recv error"); - if (msg.type != MSG_EXIT) - fatalx("unexpected message"); + do { + log_debug3("%s: waiting for exit message from parent", a->name); + if (privsep_recv(pio, &msg, NULL) != 0) + fatalx("privsep_recv error"); + } while (msg.type != MSG_EXIT); #ifdef DEBUG COUNTFDS(a->name); Index: child.c =================================================================== RCS file: /cvsroot/fdm/fdm/child.c,v retrieving revision 1.147 diff -u -p -r1.147 child.c --- child.c 17 May 2009 19:20:08 -0000 1.147 +++ child.c 24 May 2009 14:56:51 -0000 @@ -18,6 +18,7 @@ #include <sys/types.h> #include <sys/socket.h> +#include <sys/wait.h> #include <unistd.h> @@ -35,6 +36,9 @@ child_sighandler(int sig) case SIGUSR1: sigusr1 = 1; break; + case SIGCHLD: + waitpid(WAIT_ANY, NULL, WNOHANG); + break; case SIGTERM: cleanup_purge(); _exit(1); @@ -60,6 +64,7 @@ child_fork(void) sigaddset(&act.sa_mask, SIGUSR1); sigaddset(&act.sa_mask, SIGINT); sigaddset(&act.sa_mask, SIGTERM); + sigaddset(&act.sa_mask, SIGCHLD); act.sa_flags = SA_RESTART; act.sa_handler = SIG_IGN; @@ -75,6 +80,8 @@ child_fork(void) fatal("sigaction failed"); if (sigaction(SIGTERM, &act, NULL) < 0) fatal("sigaction failed"); + if (sigaction(SIGCHLD, &act, NULL) < 0) + fatal("sigaction failed"); return (0); default: Index: command.c =================================================================== RCS file: /cvsroot/fdm/fdm/command.c,v retrieving revision 1.54 diff -u -p -r1.54 command.c --- command.c 17 May 2009 18:23:45 -0000 1.54 +++ command.c 24 May 2009 14:56:51 -0000 @@ -132,6 +132,8 @@ cmd_start(const char *s, int flags, cons fatal("signal failed"); if (signal(SIGUSR2, SIG_DFL) == SIG_ERR) fatal("signal failed"); + if (signal(SIGCHLD, SIG_DFL) == SIG_ERR) + fatal("signal failed"); execl(_PATH_BSHELL, "sh", "-c", s, (char *) NULL); fatal("execl failed"); @@ -271,10 +273,14 @@ cmd_poll(struct cmd *cmd, char **out, ch CMD_DEBUG(cmd, "polling, timeout=%d", timeout); switch (io_polln(ios, 3, &io, timeout, cause)) { case -1: + CMD_DEBUG(cmd, "poll error: %s", strerror(errno)); if (errno == EAGAIN) break; + kill(cmd->pid, SIGTERM); return (-1); case 0: + CMD_DEBUG(cmd, "poll closed"); + kill(cmd->pid, SIGTERM); /* * Check for closed. It'd be nice for closed input to * be an error, but we can't tell the difference @@ -302,6 +308,7 @@ cmd_poll(struct cmd *cmd, char **out, ch break; } } + CMD_DEBUG(cmd, "poll out"); /* * Retrieve and return a line if possible. This must be after the Index: fdm.c =================================================================== RCS file: /cvsroot/fdm/fdm/fdm.c,v retrieving revision 1.182 diff -u -p -r1.182 fdm.c --- fdm.c 17 May 2009 19:20:08 -0000 1.182 +++ fdm.c 24 May 2009 14:56:52 -0000 @@ -44,6 +44,7 @@ const char *malloc_options = "AFGJPRX"; void sighandler(int); struct child *check_children(struct children *, u_int *); +int wait_children(struct children *, struct children *); struct conf conf; @@ -181,6 +182,79 @@ check_children(struct children *children return (NULL); } +/* Wait for a child and deal with its exit. */ +int +wait_children(struct children *children, struct children *dead_children) +{ + struct child *child, *child2; + pid_t pid; + int status, retcode = 0; + u_int i, j; + + for (;;) { + log_debug3("parent: waiting for children"); + /* Wait for a child. */ + switch (pid = waitpid(WAIT_ANY, &status, WNOHANG)) { + case 0: + return (0); + case -1: + if (errno == ECHILD) + return (0); + fatal("waitpid failed"); + } + + /* Handle the exit status. */ + if (WIFSIGNALED(status)) { + retcode = 1; + log_debug2("parent: child %ld got signal %d", + (long) pid, WTERMSIG(status)); + } else if (!WIFEXITED(status)) { + retcode = 1; + log_debug2("parent: child %ld exited badly", + (long) pid); + } else { + if (WEXITSTATUS(status) != 0) + retcode = 1; + log_debug2("parent: child %ld returned %d", + (long) pid, WEXITSTATUS(status)); + } + + /* Find this child. */ + child = NULL; + for (i = 0; i < ARRAY_LENGTH(children); i++) { + child = ARRAY_ITEM(children, i); + if (pid == child->pid) + break; + } + if (i == ARRAY_LENGTH(children)) { + log_debug2("parent: unidentified child %ld", + (long) pid); + continue; + } + + if (child->io != NULL) { + io_close(child->io); + io_free(child->io); + child->io = NULL; + } + ARRAY_REMOVE(children, i); + ARRAY_ADD(dead_children, child); + + /* If this child was the parent of any others, kill them too. */ + for (j = 0; j < ARRAY_LENGTH(children); j++) { + child2 = ARRAY_ITEM(children, j); + if (child2->parent != child) + continue; + + log_debug2("parent: child %ld died: killing %ld", + (long) child->pid, (long) child2->pid); + kill(child2->pid, SIGTERM); + } + } + + return (retcode); +} + __dead void usage(void) { @@ -198,7 +272,6 @@ main(int argc, char **argv) enum fdmop op = FDMOP_NONE; const char *proxy = NULL, *s; char tmp[BUFSIZ], *ptr, *lock = NULL, *user, *home = NULL; - long n; struct utsname un; struct passwd *pw; struct stat sb; @@ -207,7 +280,7 @@ main(int argc, char **argv) TAILQ_HEAD(, account) actaq; /* active accounts */ pid_t pid; struct children children, dead_children; - struct child *child, *child2; + struct child *child; struct io *rio; struct iolist iol; double tim; @@ -576,13 +649,12 @@ main(int argc, char **argv) sigaddset(&act.sa_mask, SIGUSR1); sigaddset(&act.sa_mask, SIGINT); sigaddset(&act.sa_mask, SIGTERM); + sigaddset(&act.sa_mask, SIGCHLD); act.sa_flags = SA_RESTART; act.sa_handler = SIG_IGN; if (sigaction(SIGPIPE, &act, NULL) < 0) fatal("sigaction failed"); - if (sigaction(SIGUSR1, &act, NULL) < 0) - fatal("sigaction failed"); if (sigaction(SIGUSR2, &act, NULL) < 0) fatal("sigaction failed"); @@ -597,6 +669,8 @@ main(int argc, char **argv) fatal("sigaction failed"); if (sigaction(SIGTERM, &act, NULL) < 0) fatal("sigaction failed"); + if (sigaction(SIGCHLD, &act, NULL) < 0) + fatal("sigaction failed"); /* Check lock file. */ lock = conf.lock_file; @@ -675,21 +749,35 @@ main(int argc, char **argv) (long) child->pid, a->name); } - /* Fill the io list. */ + /* Check children and fill the io list. */ ARRAY_CLEAR(&iol); for (i = 0; i < ARRAY_LENGTH(&children); i++) { child = ARRAY_ITEM(&children, i); - ARRAY_ADD(&iol, child->io); + if (child->io != NULL) + ARRAY_ADD(&iol, child->io); } /* Poll the io list. */ - n = io_polln( - ARRAY_DATA(&iol), ARRAY_LENGTH(&iol), &rio, INFTIM, NULL); - switch (n) { - case -1: - fatalx("child socket error"); - case 0: - fatalx("child socket closed"); + if (ARRAY_LENGTH(&iol) != 0) { + switch (io_polln( + ARRAY_DATA(&iol), ARRAY_LENGTH(&iol), + &rio, INFTIM, NULL)) { + case -1: + case 0: + for (i = 0; i < ARRAY_LENGTH(&children); i++) { + child = ARRAY_ITEM(&children, i); + if (rio != child->io) + continue; + log_debug2("parent: child %ld socket " + "error", (long) child->pid); + kill(child->pid, SIGTERM); + + io_close(child->io); + io_free(child->io); + child->io = NULL; + } + break; + } } /* Check all children for pending privsep messages. */ @@ -703,50 +791,17 @@ main(int argc, char **argv) continue; /* Child has said it is ready to exit, tell it to. */ + log_debug2("parent: sending exit message to child %ld", + (long) child->pid); memset(&msg, 0, sizeof msg); msg.type = MSG_EXIT; if (privsep_send(child->io, &msg, NULL) != 0) fatalx("privsep_send error"); - - /* Wait for the child. */ - if (waitpid(child->pid, &status, 0) == -1) - fatal("waitpid failed"); - if (WIFSIGNALED(status)) { - res = 1; - log_debug2("parent: child %ld got signal %d", - (long) child->pid, WTERMSIG(status)); - } else if (!WIFEXITED(status)) { - res = 1; - log_debug2("parent: child %ld exited badly", - (long) child->pid); - } else { - if (WEXITSTATUS(status) != 0) - res = 1; - log_debug2("parent: child %ld returned %d", - (long) child->pid, WEXITSTATUS(status)); - } - - io_close(child->io); - io_free(child->io); - child->io = NULL; - - ARRAY_REMOVE(&children, i); - ARRAY_ADD(&dead_children, child); - - /* - * If this child was the parent of any others, kill - * them too. - */ - for (i = 0; i < ARRAY_LENGTH(&children); i++) { - child2 = ARRAY_ITEM(&children, i); - if (child2->parent != child) - continue; - - log_debug("parent: child %ld died: killing %ld", - (long) child->pid, (long) child2->pid); - kill(child2->pid, SIGTERM); - } } + + /* Collect any dead children. */ + if (wait_children(&children, &dead_children) != 0) + res = 1; } ARRAY_FREE(&iol); Index: parent-deliver.c =================================================================== RCS file: /cvsroot/fdm/fdm/parent-deliver.c,v retrieving revision 1.10 diff -u -p -r1.10 parent-deliver.c --- parent-deliver.c 25 Jul 2007 22:05:06 -0000 1.10 +++ parent-deliver.c 24 May 2009 14:56:53 -0000 @@ -57,10 +57,9 @@ parent_deliver(struct child *child, stru /* Check if child is alive and send to it if so. */ child = data->child; - if (child->io != NULL && kill(child->pid, 0) == 0) { - if (privsep_send(child->io, msg, msgbuf) != 0) - fatalx("privsep_send error"); - } else + if (child->io != NULL) + privsep_send(child->io, msg, msgbuf); + else log_debug2("%s: child %ld missing", a->name, (long) child->pid); mail_close(m); Index: privsep.c =================================================================== RCS file: /cvsroot/fdm/fdm/privsep.c,v retrieving revision 1.11 diff -u -p -r1.11 privsep.c --- privsep.c 25 Jul 2007 22:05:06 -0000 1.11 +++ privsep.c 24 May 2009 14:56:53 -0000 @@ -51,6 +51,8 @@ privsep_check(struct io *io) int privsep_recv(struct io *io, struct msg *msg, struct msgbuf *msgbuf) { + char *tmpbuf; + if (msgbuf != NULL) { msgbuf->buf = NULL; msgbuf->len = 0; @@ -64,13 +66,17 @@ privsep_recv(struct io *io, struct msg * if (msg->size == 0) return (0); - if (msgbuf == NULL) - return (-1); - msgbuf->len = msg->size; - if (io_wait(io, msgbuf->len, INFTIM, NULL) != 0) - return (-1); - if ((msgbuf->buf = io_read(io, msgbuf->len)) == NULL) + if (io_wait(io, msg->size, INFTIM, NULL) != 0) return (-1); + if (msgbuf == NULL) { + if ((tmpbuf = io_read(io, msg->size)) == NULL) + return (-1); + xfree(tmpbuf); + } else { + if ((msgbuf->buf = io_read(io, msg->size)) == NULL) + return (-1); + msgbuf->len = msg->size; + } return (0); } On Sat, May 23, 2009 at 12:31:44AM +0530, Ritesh Raj Sarraf wrote: > Hi Nicholas, > > Log is available at: > http://www.researchut.com/tmp/fdm.bug.gz > Hope this helps triage the bug. > > Ritesh > -- > Ritesh Raj Sarraf > RESEARCHUT - http://www.researchut.com > "Necessity is the mother of invention." > -- To UNSUBSCRIBE, email to debian-bugs-dist-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org