Okay, this is the problem:

gmail-researchut: spamprobe receive: io: poll: Connection timed out

Your pipe process does not return in the timeout, so fdm aborts the
fetch. Unfortunately this then causes various things which sometimes end up
with fdm not exiting correctly.

fdm's management of child processes isn't always very good, particularly when
an error occurs - sometimes stuff doesn't die in the right order and it ends up
with fatal errors. The whole multiprocess idea is to allow fdm to run lots of
things in parallel and to drop privileges when running as root, but I'm not
sure it is worth it now, I should really simplify it a lot and probably forget
about using it as root.

You can see similar problems with a configuration file such as:

    set timeout 2
    account 'stdin' stdin
    match exec 'sleep 10' returns (0, ) action drop

And then eg echo|fdm -vvvv f

This diff makes several changes which hopefully should make it a bit more
robust and correctly handle the various child processes on exit.

Please test.

Index: child-deliver.c
===================================================================
RCS file: /cvsroot/fdm/fdm/child-deliver.c,v
retrieving revision 1.21
diff -u -p -r1.21 child-deliver.c
--- child-deliver.c     17 May 2009 19:20:08 -0000      1.21
+++ child-deliver.c     24 May 2009 14:56:51 -0000
@@ -61,10 +61,10 @@ child_deliver(struct child *child, struc
 
        if (privsep_send(pio, &msg, &msgbuf) != 0)
                fatalx("privsep_send error");
-       if (privsep_recv(pio, &msg, NULL) != 0)
-               fatalx("privsep_recv error");
-       if (msg.type != MSG_EXIT)
-               fatalx("unexpected message");
+       do {
+               if (privsep_recv(pio, &msg, NULL) != 0)
+                       fatalx("privsep_recv error");
+       } while (msg.type != MSG_EXIT);
 
 #ifdef DEBUG
        COUNTFDS(a->name);
Index: child-fetch.c
===================================================================
RCS file: /cvsroot/fdm/fdm/child-fetch.c,v
retrieving revision 1.72
diff -u -p -r1.72 child-fetch.c
--- child-fetch.c       17 May 2009 18:23:45 -0000      1.72
+++ child-fetch.c       24 May 2009 14:56:51 -0000
@@ -127,11 +127,11 @@ child_fetch(struct child *child, struct 
        log_debug3("%s: sending exit message to parent", a->name);
        if (privsep_send(pio, &msg, NULL) != 0)
                fatalx("privsep_send error");
-       log_debug3("%s: waiting for exit message from parent", a->name);
-       if (privsep_recv(pio, &msg, NULL) != 0)
-               fatalx("privsep_recv error");
-       if (msg.type != MSG_EXIT)
-               fatalx("unexpected message");
+       do {
+               log_debug3("%s: waiting for exit message from parent", a->name);
+               if (privsep_recv(pio, &msg, NULL) != 0)
+                       fatalx("privsep_recv error");                   
+       } while (msg.type != MSG_EXIT);
 
 #ifdef DEBUG
        COUNTFDS(a->name);
Index: child.c
===================================================================
RCS file: /cvsroot/fdm/fdm/child.c,v
retrieving revision 1.147
diff -u -p -r1.147 child.c
--- child.c     17 May 2009 19:20:08 -0000      1.147
+++ child.c     24 May 2009 14:56:51 -0000
@@ -18,6 +18,7 @@
 
 #include <sys/types.h>
 #include <sys/socket.h>
+#include <sys/wait.h>
 
 #include <unistd.h>
 
@@ -35,6 +36,9 @@ child_sighandler(int sig)
        case SIGUSR1:
                sigusr1 = 1;
                break;
+       case SIGCHLD:
+               waitpid(WAIT_ANY, NULL, WNOHANG);
+               break;
        case SIGTERM:
                cleanup_purge();
                _exit(1);
@@ -60,6 +64,7 @@ child_fork(void)
                sigaddset(&act.sa_mask, SIGUSR1);
                sigaddset(&act.sa_mask, SIGINT);
                sigaddset(&act.sa_mask, SIGTERM);
+               sigaddset(&act.sa_mask, SIGCHLD);
                act.sa_flags = SA_RESTART;
 
                act.sa_handler = SIG_IGN;
@@ -75,6 +80,8 @@ child_fork(void)
                        fatal("sigaction failed");
                if (sigaction(SIGTERM, &act, NULL) < 0)
                        fatal("sigaction failed");
+               if (sigaction(SIGCHLD, &act, NULL) < 0)
+                       fatal("sigaction failed");
 
                return (0);
        default:
Index: command.c
===================================================================
RCS file: /cvsroot/fdm/fdm/command.c,v
retrieving revision 1.54
diff -u -p -r1.54 command.c
--- command.c   17 May 2009 18:23:45 -0000      1.54
+++ command.c   24 May 2009 14:56:51 -0000
@@ -132,6 +132,8 @@ cmd_start(const char *s, int flags, cons
                        fatal("signal failed");
                 if (signal(SIGUSR2, SIG_DFL) == SIG_ERR)
                        fatal("signal failed");
+                if (signal(SIGCHLD, SIG_DFL) == SIG_ERR)
+                       fatal("signal failed");
 
                execl(_PATH_BSHELL, "sh", "-c", s, (char *) NULL);
                fatal("execl failed");
@@ -271,10 +273,14 @@ cmd_poll(struct cmd *cmd, char **out, ch
                CMD_DEBUG(cmd, "polling, timeout=%d", timeout);
                switch (io_polln(ios, 3, &io, timeout, cause)) {
                case -1:
+                       CMD_DEBUG(cmd, "poll error: %s", strerror(errno));
                        if (errno == EAGAIN)
                                break;
+                       kill(cmd->pid, SIGTERM);
                        return (-1);
                case 0:
+                       CMD_DEBUG(cmd, "poll closed");
+                       kill(cmd->pid, SIGTERM);
                        /*
                         * Check for closed. It'd be nice for closed input to
                         * be an error, but we can't tell the difference
@@ -302,6 +308,7 @@ cmd_poll(struct cmd *cmd, char **out, ch
                        break;
                }
        }
+       CMD_DEBUG(cmd, "poll out");
 
        /*
         * Retrieve and return a line if possible. This must be after the
Index: fdm.c
===================================================================
RCS file: /cvsroot/fdm/fdm/fdm.c,v
retrieving revision 1.182
diff -u -p -r1.182 fdm.c
--- fdm.c       17 May 2009 19:20:08 -0000      1.182
+++ fdm.c       24 May 2009 14:56:52 -0000
@@ -44,6 +44,7 @@ const char            *malloc_options = "AFGJPRX";
 
 void                    sighandler(int);
 struct child           *check_children(struct children *, u_int *);
+int                     wait_children(struct children *, struct children *);
 
 struct conf             conf;
 
@@ -181,6 +182,79 @@ check_children(struct children *children
        return (NULL);
 }
 
+/* Wait for a child and deal with its exit. */
+int
+wait_children(struct children *children, struct children *dead_children)
+{
+       struct child    *child, *child2;
+       pid_t            pid;
+       int              status, retcode = 0;
+       u_int            i, j;
+
+       for (;;) {
+               log_debug3("parent: waiting for children");
+               /* Wait for a child. */
+               switch (pid = waitpid(WAIT_ANY, &status, WNOHANG)) {
+               case 0:
+                       return (0);
+               case -1:
+                       if (errno == ECHILD)
+                               return (0);
+                       fatal("waitpid failed");
+               }
+
+               /* Handle the exit status. */
+               if (WIFSIGNALED(status)) {
+                       retcode = 1;
+                       log_debug2("parent: child %ld got signal %d",
+                           (long) pid, WTERMSIG(status));
+               } else if (!WIFEXITED(status)) {
+                       retcode = 1;
+                       log_debug2("parent: child %ld exited badly",
+                           (long) pid);
+               } else {
+                       if (WEXITSTATUS(status) != 0)
+                               retcode = 1;
+                       log_debug2("parent: child %ld returned %d",
+                           (long) pid, WEXITSTATUS(status));
+               }
+               
+               /* Find this child. */
+               child = NULL;
+               for (i = 0; i < ARRAY_LENGTH(children); i++) {
+                       child = ARRAY_ITEM(children, i);
+                       if (pid == child->pid)
+                               break;
+               }
+               if (i == ARRAY_LENGTH(children)) {
+                       log_debug2("parent: unidentified child %ld",
+                           (long) pid);
+                       continue;
+               }
+
+               if (child->io != NULL) {
+                       io_close(child->io);
+                       io_free(child->io);
+                       child->io = NULL;
+               }
+               ARRAY_REMOVE(children, i);
+               ARRAY_ADD(dead_children, child);
+               
+               /* If this child was the parent of any others, kill them too. */
+               for (j = 0; j < ARRAY_LENGTH(children); j++) {
+                       child2 = ARRAY_ITEM(children, j);
+                       if (child2->parent != child)
+                               continue;
+                       
+                       log_debug2("parent: child %ld died: killing %ld",
+                           (long) child->pid, (long) child2->pid);
+                       kill(child2->pid, SIGTERM);
+               }
+       }
+
+       return (retcode);
+}
+
 __dead void
 usage(void)
 {
@@ -198,7 +272,6 @@ main(int argc, char **argv)
        enum fdmop       op = FDMOP_NONE;
        const char      *proxy = NULL, *s;
        char             tmp[BUFSIZ], *ptr, *lock = NULL, *user, *home = NULL;
-       long             n;
        struct utsname   un;
        struct passwd   *pw;
        struct stat      sb;
@@ -207,7 +280,7 @@ main(int argc, char **argv)
        TAILQ_HEAD(, account) actaq; /* active accounts */
        pid_t            pid;
        struct children  children, dead_children;
-       struct child    *child, *child2;
+       struct child    *child;
        struct io       *rio;
        struct iolist    iol;
        double           tim;
@@ -576,13 +649,12 @@ main(int argc, char **argv)
        sigaddset(&act.sa_mask, SIGUSR1);
        sigaddset(&act.sa_mask, SIGINT);
        sigaddset(&act.sa_mask, SIGTERM);
+       sigaddset(&act.sa_mask, SIGCHLD);
        act.sa_flags = SA_RESTART;
 
        act.sa_handler = SIG_IGN;
        if (sigaction(SIGPIPE, &act, NULL) < 0)
                fatal("sigaction failed");
-       if (sigaction(SIGUSR1, &act, NULL) < 0)
-               fatal("sigaction failed");
        if (sigaction(SIGUSR2, &act, NULL) < 0)
                fatal("sigaction failed");
 
@@ -597,6 +669,8 @@ main(int argc, char **argv)
                fatal("sigaction failed");
        if (sigaction(SIGTERM, &act, NULL) < 0)
                fatal("sigaction failed");
+       if (sigaction(SIGCHLD, &act, NULL) < 0)
+               fatal("sigaction failed");
 
        /* Check lock file. */
        lock = conf.lock_file;
@@ -675,21 +749,35 @@ main(int argc, char **argv)
                            (long) child->pid, a->name);
                }
 
-               /* Fill the io list. */
+               /* Check children and fill the io list. */
                ARRAY_CLEAR(&iol);
                for (i = 0; i < ARRAY_LENGTH(&children); i++) {
                        child = ARRAY_ITEM(&children, i);
-                       ARRAY_ADD(&iol, child->io);
+                       if (child->io != NULL)
+                               ARRAY_ADD(&iol, child->io);
                }
 
                /* Poll the io list. */
-               n = io_polln(
-                   ARRAY_DATA(&iol), ARRAY_LENGTH(&iol), &rio, INFTIM, NULL);
-               switch (n) {
-               case -1:
-                       fatalx("child socket error");
-               case 0:
-                       fatalx("child socket closed");
+               if (ARRAY_LENGTH(&iol) != 0) {
+                       switch (io_polln(
+                           ARRAY_DATA(&iol), ARRAY_LENGTH(&iol),
+                           &rio, INFTIM, NULL)) {
+                       case -1:
+                       case 0:
+                               for (i = 0; i < ARRAY_LENGTH(&children); i++) {
+                                       child = ARRAY_ITEM(&children, i);
+                                       if (rio != child->io)
+                                               continue;
+                                       log_debug2("parent: child %ld socket "
+                                           "error", (long) child->pid);
+                                       kill(child->pid, SIGTERM);
+                                       
+                                       io_close(child->io);
+                                       io_free(child->io);
+                                       child->io = NULL;
+                               }
+                               break;
+                       }
                }
 
                /* Check all children for pending privsep messages. */
@@ -703,50 +791,17 @@ main(int argc, char **argv)
                                continue;
 
                        /* Child has said it is ready to exit, tell it to. */
+                       log_debug2("parent: sending exit message to child %ld",
+                           (long) child->pid);
                        memset(&msg, 0, sizeof msg);
                        msg.type = MSG_EXIT;
                        if (privsep_send(child->io, &msg, NULL) != 0)
                                fatalx("privsep_send error");
-
-                       /* Wait for the child. */
-                       if (waitpid(child->pid, &status, 0) == -1)
-                               fatal("waitpid failed");
-                       if (WIFSIGNALED(status)) {
-                               res = 1;
-                               log_debug2("parent: child %ld got signal %d",
-                                   (long) child->pid, WTERMSIG(status));
-                       } else if (!WIFEXITED(status)) {
-                               res = 1;
-                               log_debug2("parent: child %ld exited badly",
-                                   (long) child->pid);
-                       } else {
-                               if (WEXITSTATUS(status) != 0)
-                                       res = 1;
-                               log_debug2("parent: child %ld returned %d",
-                                   (long) child->pid, WEXITSTATUS(status));
-                       }
-
-                       io_close(child->io);
-                       io_free(child->io);
-                       child->io = NULL;
-
-                       ARRAY_REMOVE(&children, i);
-                       ARRAY_ADD(&dead_children, child);
-
-                       /*
-                        * If this child was the parent of any others, kill
-                        * them too.
-                        */
-                       for (i = 0; i < ARRAY_LENGTH(&children); i++) {
-                               child2 = ARRAY_ITEM(&children, i);
-                               if (child2->parent != child)
-                                       continue;
-
-                               log_debug("parent: child %ld died: killing %ld",
-                                   (long) child->pid, (long) child2->pid);
-                               kill(child2->pid, SIGTERM);
-                       }
                }
+
+               /* Collect any dead children. */
+               if (wait_children(&children, &dead_children) != 0)
+                       res = 1;
        }
        ARRAY_FREE(&iol);
 
Index: parent-deliver.c
===================================================================
RCS file: /cvsroot/fdm/fdm/parent-deliver.c,v
retrieving revision 1.10
diff -u -p -r1.10 parent-deliver.c
--- parent-deliver.c    25 Jul 2007 22:05:06 -0000      1.10
+++ parent-deliver.c    24 May 2009 14:56:53 -0000
@@ -57,10 +57,9 @@ parent_deliver(struct child *child, stru
 
        /* Check if child is alive and send to it if so. */
        child = data->child;
-       if (child->io != NULL && kill(child->pid, 0) == 0) {
-               if (privsep_send(child->io, msg, msgbuf) != 0)
-                       fatalx("privsep_send error");
-       } else
+       if (child->io != NULL)
+               privsep_send(child->io, msg, msgbuf);
+       else
                log_debug2("%s: child %ld missing", a->name, (long) child->pid);
 
        mail_close(m);
Index: privsep.c
===================================================================
RCS file: /cvsroot/fdm/fdm/privsep.c,v
retrieving revision 1.11
diff -u -p -r1.11 privsep.c
--- privsep.c   25 Jul 2007 22:05:06 -0000      1.11
+++ privsep.c   24 May 2009 14:56:53 -0000
@@ -51,6 +51,8 @@ privsep_check(struct io *io)
 int
 privsep_recv(struct io *io, struct msg *msg, struct msgbuf *msgbuf)
 {
+       char    *tmpbuf;
+       
        if (msgbuf != NULL) {
                msgbuf->buf = NULL;
                msgbuf->len = 0;
@@ -64,13 +66,17 @@ privsep_recv(struct io *io, struct msg *
        if (msg->size == 0)
                return (0);
 
-       if (msgbuf == NULL)
-               return (-1);
-       msgbuf->len = msg->size;
-       if (io_wait(io, msgbuf->len, INFTIM, NULL) != 0)
-               return (-1);
-       if ((msgbuf->buf = io_read(io, msgbuf->len)) == NULL)
+       if (io_wait(io, msg->size, INFTIM, NULL) != 0)
                return (-1);
+       if (msgbuf == NULL) {
+               if ((tmpbuf = io_read(io, msg->size)) == NULL)
+                       return (-1);
+               xfree(tmpbuf);
+       } else {
+               if ((msgbuf->buf = io_read(io, msg->size)) == NULL)
+                       return (-1);
+               msgbuf->len = msg->size;
+       }
 
        return (0);
 }


On Sat, May 23, 2009 at 12:31:44AM +0530, Ritesh Raj Sarraf wrote:
> Hi Nicholas,
> 
> Log is available at:
> http://www.researchut.com/tmp/fdm.bug.gz
> Hope this helps triage the bug.
> 
> Ritesh
> -- 
> Ritesh Raj Sarraf
> RESEARCHUT - http://www.researchut.com
> "Necessity is the mother of invention."
> 





-- 
To UNSUBSCRIBE, email to debian-bugs-dist-requ...@lists.debian.org
with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org

Reply via email to