Package: icecc Version: 0.9.1-1 Severity: normal Tags: patch The problem is in the libicecc.a: the function DiscoverSched::try_get_scheduler always return NULL because connect always returns EINPROGRESS (the socket is non-blocking).
With the attached patch, we use a blocking sockect to connect to scheduler. Tested on a 25 nodes cluster with no problems. This bug is present in icecc-monitor=1.1-3 (compiled against icecc 0.9) but was not present in icecc-monitor=1.1-2 (compiled against icecc 0.8). -- System Information: Debian Release: lenny/sid APT prefers unstable APT policy: (990, 'unstable'), (550, 'testing'), (25, 'experimental') Architecture: amd64 (x86_64) Kernel: Linux 2.6.27 (SMP w/2 CPU cores) Locale: LANG=fr_FR.UTF-8, LC_CTYPE=fr_FR.UTF-8 (charmap=UTF-8) Shell: /bin/sh linked to /bin/dash Versions of packages icecc depends on: ii adduser 3.110 add and remove users and groups ii debconf [debconf-2.0] 1.5.24 Debian configuration management sy ii dpkg 1.14.22 Debian package management system ii g++ [c++-compiler] 4:4.3.2-2 The GNU C++ compiler ii g++-4.3 [c++-compiler] 4.3.2-2~exp1 The GNU C++ compiler ii gcc [c-compiler] 4:4.3.2-2 The GNU C compiler ii gcc-4.3 [c-compiler] 4.3.2-2~exp1 The GNU C compiler ii libc6 2.8+20080809-2 GNU C Library: Shared libraries ii libgcc1 1:4.3.2-2~exp1 GCC support library ii libstdc++6 4.3.2-2~exp1 The GNU Standard C++ Library v3 ii lsb-base 3.2-20 Linux Standard Base 3.2 init scrip icecc recommends no packages. Versions of packages icecc suggests: ii icecc-monitor 1.1-3 icecc monitor for KDE -- debconf information excluded *** 12_fix_nonblocking_sockets_for_icemon.diff --- icecc-0.9.1.orig/services/comm.cpp 2008-05-12 19:57:14.000000000 +0200 +++ icecc-0.9.1/services/comm.cpp 2008-10-20 15:43:23.000000000 +0200 @@ -1160,14 +1160,21 @@ if (ask_fd >= 0) { + fcntl(ask_fd, F_SETFL, 0); int status = connect (ask_fd, (struct sockaddr*) &remote_addr, sizeof(remote_addr) ); - if (status == 0 || (status < 0 && errno == EISCONN)) + int connect_errno = errno; + fcntl(ask_fd, F_SETFL, O_NONBLOCK); + if (status == 0 || (status < 0 && connect_errno == EISCONN)) { int fd = ask_fd; ask_fd = -1; return Service::createChannel(fd, (struct sockaddr*) &remote_addr, sizeof(remote_addr)); } + else + { + log_error() << "connect failed : " << strerror(connect_errno) << endl; + } } return 0; }
--- icecc-0.9.1.orig/services/comm.cpp 2008-05-12 19:57:14.000000000 +0200 +++ icecc-0.9.1/services/comm.cpp 2008-10-20 15:43:23.000000000 +0200 @@ -1160,14 +1160,21 @@ if (ask_fd >= 0) { + fcntl(ask_fd, F_SETFL, 0); int status = connect (ask_fd, (struct sockaddr*) &remote_addr, sizeof(remote_addr) ); - if (status == 0 || (status < 0 && errno == EISCONN)) + int connect_errno = errno; + fcntl(ask_fd, F_SETFL, O_NONBLOCK); + if (status == 0 || (status < 0 && connect_errno == EISCONN)) { int fd = ask_fd; ask_fd = -1; return Service::createChannel(fd, (struct sockaddr*) &remote_addr, sizeof(remote_addr)); } + else + { + log_error() << "connect failed : " << strerror(connect_errno) << endl; + } } return 0; }