Package: samba
Version: 2:3.4.0-2+b1

Hello,
whenever I try to access same file from both Linux locally and from XP remotely (for example building software from same tree, once on Linux, once on Windows), samba commits suicide in oplocks code:

push_file_id_24 (buf=0x7fffdacbf2c0 "\22\b", id=0x30) at lib/file_id.c:71
71      lib/file_id.c: No such file or directory.
        in lib/file_id.c
(gdb) bt
#0 push_file_id_24 (buf=0x7fffdacbf2c0 "\22\b", id=0x30) at lib/file_id.c:71 #1 0x00000000006fc295 in break_kernel_oplock (msg_ctx=0x1d1e500, fsp=0x0) at smbd/oplock.c:43 #2 0x00000000006d4f97 in tevent_common_check_signal (ev=0x1d1e440) at ../lib/tevent/tevent_signal.c:311 #3 0x00000000006d34b5 in run_events (ev=0x7fffdacbf2c0, selrtn=-1, read_fds=0x7fffdacbf4f0, write_fds=0x7fffdacbf470) at lib/events.c:89 #4 0x0000000000509a23 in smbd_server_connection_loop_once () at smbd/process.c:798
#5  smbd_process () at smbd/process.c:2151
#6 0x0000000000943e95 in smbd_accept_connection (ev=<value optimized out>, fde=<value optimized out>, flags=<value optimized out>, private_data=<value optimized out>) at smbd/server.c:394 #7 0x00000000006d35b6 in run_events (ev=0x1d1e440, selrtn=1, read_fds=0x7fffdacbf8e0, write_fds=0x7fffdacbf860) at lib/events.c:126 #8 0x00000000006d37ff in s3_event_loop_once (ev=0x1d1e440, location=<value optimized out>) at lib/events.c:185 #9 0x00000000006d3bdc in _tevent_loop_once (ev=0x1d1e440, location=0xa78def "smbd/server.c:680") at ../lib/tevent/tevent.c:478 #10 0x000000000094492d in smbd_parent_loop (argc=<value optimized out>, argv=<value optimized out>) at smbd/server.c:680 #11 main (argc=<value optimized out>, argv=<value optimized out>) at smbd/server.c:1250

tevent_common_check_signal invoked this tevent_signal:

(gdb) print *se
$3 = {prev = 0x0, next = 0x1cb8af0, event_ctx = 0x1d1e440, signum = 35, sa_flags = 4, handler = 0x6fc8f0 <linux_oplock_signal_handler>, private_data = 0x1d28060, handler_name = 0x9f4705 "linux_oplock_signal_handler", location = 0x9f46ed "smbd/oplock_linux.c:210", additional_data = 0x1d37720}

For signal 35 it says:

(gdb) print i
$14 = 35
(gdb) print sig_state->signal_count[35]
$19 = {count = 27, seen = 25}
(gdb) print count
$20 = 2

So count was 2, and it matches with expected state. But code to call handlers says:

for (j=0;j<count;j++) {
  /* note the use of the sig_info array as a
     ring buffer */
  int ofs = ((count-1) + j) % SA_INFO_QUEUE_COUNT;
  se->handler(ev, se, i, 1,
              (void*)&sig_state->sig_info[i][ofs],
              se->private_data);
}

So for count=2 it will try to access elements (2-1)+0 = 1, and (2-1)+1 = 2. But elements 0 & 1 are ones which are valid:

(gdb) print sig_state->sig_info[35][0]._sifields._sigpoll.si_fd
$43 = 31
(gdb) print sig_state->sig_info[35][1]._sifields._sigpoll.si_fd
$44 = 32
(gdb) print sig_state->sig_info[35][2]._sifields._sigpoll.si_fd
$45 = 1918967843

I admit that I have troubles understanding what this code tries to do, but as far as I can tell second index to sig_info[i][xxx] array should be using some different indices.

As far as I can tell attached patch could do the trick. Unfortunately Samba is FTBFS on my box (seems to be completely confused about talloc.h & libtalloc.so) so I could not even compile-test it...
                                                                Petr
diff -urN samba-3.4.0.orig/lib/tevent/tevent_signal.c 
samba-3.4.0/lib/tevent/tevent_signal.c
--- samba-3.4.0.orig/lib/tevent/tevent_signal.c 2009-07-03 04:21:14.000000000 
-0700
+++ samba-3.4.0/lib/tevent/tevent_signal.c      2009-08-11 20:50:35.000000000 
-0700
@@ -32,8 +32,8 @@
 
 #define NUM_SIGNALS 64
 
-/* maximum number of SA_SIGINFO signals to hold in the queue */
-#define SA_INFO_QUEUE_COUNT 100
+/* maximum number of SA_SIGINFO signals to hold in the queue.  Must be power 
of two. */
+#define SA_INFO_QUEUE_COUNT 64
 
 struct sigcounter {
        uint32_t count;
@@ -44,6 +44,9 @@
 #define SIG_SEEN(s, n) (s).seen += (n)
 #define SIG_PENDING(s) ((s).seen != (s).count)
 
+#define SIG_QUEUE_IDX(sig) (sig_state->signal_count[(sig)].count % 
SA_INFO_QUEUE_COUNT)
+#define SIG_DEQUEUE_IDX(sig, idx) ((sig_state->signal_count[(sig)].seen + 
(idx)) % SA_INFO_QUEUE_COUNT)
+
 struct tevent_common_signal_list {
        struct tevent_common_signal_list *prev, *next;
        struct tevent_signal *se;
@@ -70,10 +73,7 @@
 */
 static uint32_t sig_count(struct sigcounter s)
 {
-       if (s.count >= s.seen) {
-               return s.count - s.seen;
-       }
-       return 1 + (0xFFFFFFFF & ~(s.seen - s.count));
+       return s.count - s.seen;
 }
 
 /*
@@ -97,7 +97,7 @@
                                              void *uctx)
 {
        uint32_t count = sig_count(sig_state->signal_count[signum]);
-       sig_state->sig_info[signum][count] = *info;
+       sig_state->sig_info[signum][SIG_QUEUE_IDX(signum)] = *info;
 
        tevent_common_signal_handler(signum);
 
@@ -307,21 +307,11 @@
                                for (j=0;j<count;j++) {
                                        /* note the use of the sig_info array 
as a
                                           ring buffer */
-                                       int ofs = ((count-1) + j) % 
SA_INFO_QUEUE_COUNT;
+                                       int ofs = SIG_DEQUEUE_IDX(i, j);
                                        se->handler(ev, se, i, 1, 
-                                                   
(void*)&sig_state->sig_info[i][ofs], 
+                                                   
(void*)&sig_state->sig_info[i][ofs],
                                                    se->private_data);
                                }
-                               if (SIG_PENDING(sig_state->sig_blocked[i])) {
-                                       /* we'd filled the queue, unblock the
-                                          signal now */
-                                       sigset_t set;
-                                       sigemptyset(&set);
-                                       sigaddset(&set, i);
-                                       SIG_SEEN(sig_state->sig_blocked[i], 
-                                                
sig_count(sig_state->sig_blocked[i]));
-                                       sigprocmask(SIG_UNBLOCK, &set, NULL);
-                               }
                                if (se->sa_flags & SA_RESETHAND) {
                                        talloc_free(se);
                                }
@@ -335,6 +325,35 @@
                }
                SIG_SEEN(sig_state->signal_count[i], count);
                SIG_SEEN(sig_state->got_signal, count);
+#ifdef SA_SIGINFO
+               /* It does not matter whether we first look at pending, and then
+                  increment seen, or vice versa, there will be always race.
+
+                  If we first unblock signal, and then increment seen, it can
+                  happen that hundreds of signals will get delivered 
immediately
+                  after unblocking - and queue will overflow, as end condition
+                  checks for count+1==MAX, and we are already beyond that 
point.
+
+                  If instead we first increment seen, and then do unblocking, 
it is
+                  instead possible that (at least two) signals which got us 
into
+                  blocked state were just delivered after incrementing seen.  
In
+                  that case we'll errorneously enable signal, although queue is
+                  full, again potentially overflowing queue.
+
+                  As I can trigger first crash by sitting in gdb for long time,
+                  while I cannot easily trigger second, let's first increment
+                  seen, and then look at pending. */
+               if (SIG_PENDING(sig_state->sig_blocked[i])) {
+                       /* we'd filled the queue, unblock the
+                          signal now */
+                       sigset_t set;
+                       sigemptyset(&set);
+                       sigaddset(&set, i);
+                       SIG_SEEN(sig_state->sig_blocked[i], 
+                                sig_count(sig_state->sig_blocked[i]));
+                       sigprocmask(SIG_UNBLOCK, &set, NULL);
+               }
+#endif
        }
 
        return 1;

Reply via email to