This is a slightly updated version of the patch, rediffed to apply cleanly
to 2.1.4 as distributed in the tarball.  It logs when child die for no good
reason, so it should help address concerns that it would mask a worse
problem.

-- 
  "One disk to rule them all, One disk to find them. One disk to bring
  them all and in the darkness grind them. In the Land of Redmond
  where the shadows lie." -- The Silicon Valley Tarot
  Henrique Holschuh
diff -ru cyrus-imapd-2.1.4.orig/imap/signals.c cyrus-imapd-2.1.4/imap/signals.c
--- cyrus-imapd-2.1.4.orig/imap/signals.c       Tue Nov 27 00:25:00 2001
+++ cyrus-imapd-2.1.4/imap/signals.c    Thu May 16 14:13:19 2002
@@ -50,7 +50,7 @@
 #include "xmalloc.h"
 #include "exitcodes.h"
 
-static int gotsignal = 0;
+static volatile int gotsignal = 0;
 
 static void sighandler(int sig)
 {
diff -ru cyrus-imapd-2.1.4.orig/master/master.c cyrus-imapd-2.1.4/master/master.c
--- cyrus-imapd-2.1.4.orig/master/master.c      Thu Apr 11 18:47:29 2002
+++ cyrus-imapd-2.1.4/master/master.c   Thu May 16 14:13:19 2002
@@ -123,6 +123,7 @@
 
 struct centry {
     pid_t pid;
+    int is_available;
     struct service *s;
     struct centry *next;
 };
@@ -586,6 +587,7 @@
        /* add to child table */
        c = get_centry();
        c->pid = p;
+       c->is_available = 1;
        c->s = s;
        c->next = ctable[p % child_table_size];
        ctable[p % child_table_size] = c;
@@ -717,7 +719,17 @@
            /* first thing in the linked list */
 
            /* decrement active count for service */
-           if (c->s) c->s->nactive--;
+           if (c->s) {
+               c->s->nactive--;
+               if (c->is_available) {
+                   c->s->ready_workers--;
+                   if (WIFSIGNALED(status) ||
+                       (WIFEXITED(status) && WEXITSTATUS(status))) {
+                       syslog(LOG_WARNING, "available child %d terminated abnormally",
+                              pid);
+                   }
+               }
+           }
 
            ctable[pid % child_table_size] = c->next;
            c->next = cfreelist;
@@ -734,7 +746,17 @@
 
                t = c->next;
                /* decrement active count for service */
-               if (t->s) t->s->nactive--;
+               if (t->s) {
+                   t->s->nactive--;
+                   if (c->is_available) {
+                       t->s->ready_workers--;
+                       if (WIFSIGNALED(status) ||
+                           (WIFEXITED(status) && WEXITSTATUS(status))) {
+                           syslog(LOG_WARNING, "available child %d terminated 
+abnormally",
+                                  pid);
+                       }
+                   }
+               }
 
                c->next = t->next; /* remove node */
                t->next = cfreelist; /* add to freelist */
@@ -747,14 +769,14 @@
     }
 }
 
-static int gotsigchld = 0;
+static volatile int gotsigchld = 0;
 
 void sigchld_handler(int sig __attribute__((unused)))
 {
     gotsigchld = 1;
 }
 
-static int gotsighup = 0;
+static volatile int gotsighup = 0;
 
 void sighup_handler(int sig __attribute__((unused)))
 {
@@ -824,24 +846,43 @@
     }
 }
 
-void process_msg(struct service *s, int msg)
+void process_msg(struct service *s, struct notify_message *msg)
 {
-    switch (msg) {
+    struct centry * c;
+
+    /* Search hash table with linked list for pid */
+    c = ctable[msg->service_pid % child_table_size];
+    while (c && c->pid != msg->service_pid) c = c->next;
+
+    /* Did we find it? */
+    if (!c || c->pid != msg->service_pid) {
+       syslog(LOG_ERR, "can't find pid %d to process message %d", 
+                       msg->service_pid, msg->message);
+       return;
+    }
+
+    switch (msg->message) {
     case MASTER_SERVICE_AVAILABLE:
+       c->is_available = 1;
        s->ready_workers++;
        break;
 
     case MASTER_SERVICE_UNAVAILABLE:
+       c->is_available = 0;
        s->ready_workers--;
        break;
 
     case MASTER_SERVICE_CONNECTION:
+       if (c->is_available) {
+          syslog(LOG_ERR, "still available child pid %d, service %s reported new 
+connection",
+                          msg->service_pid, s->name);
+       }
        s->nconnections++;
        break;
-       
+
     default:
        syslog(LOG_ERR, "unrecognized message for service '%s': %x", 
-              s->name, msg);
+              s->name, msg->message);
        break;
     }
 
@@ -1241,7 +1282,7 @@
     syslog(LOG_NOTICE, "ready for work");
 
     for (;;) {
-       int r, i, msg, maxfd;
+       int r, i, maxfd;
        struct timeval tv, *tvptr;
        time_t now = time(NULL);
 #if HAVE_UCDSNMP
@@ -1349,13 +1390,15 @@
            int j;
 
            if (FD_ISSET(x, &rfds)) {
-               r = read(x, &msg, sizeof(int));
-               if (r != sizeof(int)) {
+               struct notify_message message;
+
+               r = read(x, &message, sizeof(message));
+               if (r != sizeof(message)) {
                    syslog(LOG_ERR, "got weird response from child: %x", i);
                    continue;
                }
-               
-               process_msg(&Services[i], msg);
+
+               process_msg(&Services[i], &message);
            }
 
            if (Services[i].nactive < Services[i].max_workers) {
Only in cyrus-imapd-2.1.4/master: master.c.orig
diff -ru cyrus-imapd-2.1.4.orig/master/service.c cyrus-imapd-2.1.4/master/service.c
--- cyrus-imapd-2.1.4.orig/master/service.c     Fri Mar  8 16:26:17 2002
+++ cyrus-imapd-2.1.4/master/service.c  Thu May 16 14:13:19 2002
@@ -74,13 +74,16 @@
 /* number of times this service has been used */
 static int use_count = 0;
 static int verbose = 0;
-static int gotalrm = 0;
+static volatile int gotalrm = 0;
 static int lockfd = -1;
 
 void notify_master(int fd, int msg)
 {
-    if (verbose) syslog(LOG_DEBUG, "telling master %d", msg);
-    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
+    struct notify_message notifymsg;
+    if (verbose) syslog(LOG_DEBUG, "telling master %x", msg);
+    notifymsg.message = msg;
+    notifymsg.service_pid = getpid();
+    if (write(fd, &notifymsg, sizeof(notifymsg)) != sizeof(notifymsg)) {
        syslog(LOG_ERR, "unable to tell master %x: %m", msg);
     }
 }
diff -ru cyrus-imapd-2.1.4.orig/master/service.h cyrus-imapd-2.1.4/master/service.h
--- cyrus-imapd-2.1.4.orig/master/service.h     Mon Feb 11 15:41:45 2002
+++ cyrus-imapd-2.1.4/master/service.h  Thu May 16 14:13:19 2002
@@ -63,4 +63,9 @@
     REUSE_TIMEOUT = 60
 };
 
+struct notify_message {
+    int message;
+    pid_t service_pid;
+};
+
 #endif

Reply via email to