Package: openmpi Version: 1.3.3-2 Severity: important Tags: patch User: debian-h...@lists.debian.org Usertags: hurd
Hi, currently openmpi does not build on GNU/Hurd for various small reasons, like unconditional usage of PATH_MAX, MAXPATHLEN, and MAXHOSTNAMELEN, usage of sigaction::sa_sigaction, and a wrong header check for recognizing Darwin. The attached patch hopefully fixes all the issues: - missing SA_SIGINFO, so a sa_handler is used; show_stackframe_handler() calls show_stackframe() with NULL parameters as that handler checks for non-NULL siginfo_t* and context* already - some fixed-size char[] for getcwd() to get_current_dir_name() + free() - some fixed-size char[] for gethostname() to a dynamic realloc() loop - one gethostname() call commented out, as it seems unused... - look for mach-o/arch.h instead of mach/mach_host.h to determine whether it is compiling on Darwin - one malloc() + free() instead of a fixed-size char[PATH_MAX] Please note that, given a .m4 macro is changed, then an autoreconf'ing of the build system is needed. Thanks, -- Pino
--- openmpi-1.3.3.orig/opal/util/stacktrace.c +++ openmpi-1.3.3/opal/util/stacktrace.c @@ -363,6 +363,13 @@ fflush(stderr); } +#ifndef SA_SIGINFO +static void show_stackframe_handler (int signo) +{ + show_stackframe(signo, NULL, NULL); +} + +#endif /* SA_SIGINFO */ #endif /* OMPI_WANT_PRETTY_PRINT_STACKTRACE && ! defined(__WINDOWS__) */ @@ -422,8 +429,12 @@ mca_base_param_lookup_string (param, &string_value); memset(&act, 0, sizeof(act)); +#ifdef SA_SIGINFO act.sa_sigaction = show_stackframe; act.sa_flags = SA_SIGINFO; +#else + act.sa_handler = show_stackframe_handler; +#endif #ifdef SA_ONESHOT act.sa_flags |= SA_ONESHOT; #else --- openmpi-1.3.3.orig/opal/mca/base/mca_base_param.c +++ openmpi-1.3.3/opal/mca/base/mca_base_param.c @@ -186,8 +186,14 @@ home = (char*)opal_home_directory(); if(NULL == cwd) { +#if !defined(MAXPATHLEN) && defined(__GLIBC__) + cwd = get_current_dir_name(); + if( NULL == cwd) +#else cwd = (char *) malloc(sizeof(char) * MAXPATHLEN); - if( NULL == (cwd = getcwd(cwd, MAXPATHLEN) )) { + if( NULL == (cwd = getcwd(cwd, MAXPATHLEN) )) +#endif + { opal_output(0, "Error: Unable to get the current working directory\n"); cwd = strdup("."); } --- openmpi-1.3.3.orig/orte/mca/odls/base/odls_base_default_fns.c +++ openmpi-1.3.3/orte/mca/odls/base/odls_base_default_fns.c @@ -750,8 +750,13 @@ orte_odls_job_t *jobdat; orte_pmap_t *pmap; char *pathenv = NULL, *mpiexec_pathenv = NULL; +#if !defined(MAXPATHLEN) && defined(__GLIBC__) + char *basedir=NULL; + char *dir=NULL; +#else char basedir[MAXPATHLEN]; char dir[MAXPATHLEN]; +#endif char **argvptr; char *full_search; char **argvsav=NULL; @@ -764,7 +769,11 @@ * bouncing around as we execute various apps, but we will always return * to this place as our default directory */ +#if !defined(MAXPATHLEN) && defined(__GLIBC__) + basedir = get_current_dir_name(); +#else getcwd(basedir, sizeof(basedir)); +#endif /* find the jobdat for this job */ jobdat = NULL; @@ -915,7 +924,11 @@ * again not match getcwd! This is beyond our control - we are only * ensuring they start out matching. */ +#if !defined(MAXPATHLEN) && defined(__GLIBC__) + dir = get_current_dir_name(); +#else getcwd(dir, sizeof(dir)); +#endif opal_setenv("PWD", dir, true, &app->env); /* Search for the OMPI_exec_path and PATH settings in the environment. */ @@ -1247,6 +1260,10 @@ opal_condition_signal(&orte_odls_globals.cond); OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex); +#if !defined(MAXPATHLEN) && defined(__GLIBC__) + free(basedir); + free(dir); +#endif return rc; } --- openmpi-1.3.3.orig/opal/mca/paffinity/darwin/configure.m4 +++ openmpi-1.3.3/opal/mca/paffinity/darwin/configure.m4 @@ -23,9 +23,9 @@ # ----------------------------------------------------------- AC_DEFUN([MCA_paffinity_darwin_CONFIG],[ OMPI_VAR_SCOPE_PUSH([paff_darwin_happy]) - # check to see if we have <mach/mach_host.h> + # check to see if we have <mach-o/arch.h> # as this is a Darwin-specific thing - AC_CHECK_HEADER([mach/mach_host.h], [paff_darwin_happy=yes], [paff_darwin_happy=no]) + AC_CHECK_HEADER([mach-o/arch.h], [paff_darwin_happy=yes], [paff_darwin_happy=no]) AS_IF([test "$paff_darwin_happy" = "yes"], [$1], [$2]) OMPI_VAR_SCOPE_POP --- openmpi-1.3.3.orig/opal/mca/base/mca_base_component_find.c +++ openmpi-1.3.3/opal/mca/base/mca_base_component_find.c @@ -201,11 +201,16 @@ } if (opal_list_get_end(found_components) == item) { - char h[MAXHOSTNAMELEN]; - gethostname(h, sizeof(h)); + char *h = NULL; + size_t h_length = 128; + do { + h_length *= 2; + h = realloc(h, h_length); + } while ((gethostname(h, h_length) == -1) && (errno == ENAMETOOLONG)); opal_show_help("help-mca-base.txt", "find-available:not-valid", true, h, type, requested_component_names[i]); + free(h); return OPAL_ERR_NOT_FOUND; } } --- openmpi-1.3.3.orig/orte/util/context_fns.c +++ openmpi-1.3.3/orte/util/context_fns.c @@ -56,10 +56,12 @@ { bool good = true; const char *tmp; +#if 0 /* 'hostname' looks unused... */ char hostname[MAXHOSTNAMELEN]; /* Use hostname in a few messages below */ gethostname(hostname, sizeof(hostname)); +#endif /* If we want to chdir and the chdir fails (for any reason -- such as if the dir doesn't exist, it isn't a dir, we don't have --- openmpi-1.3.3.orig/ompi/runtime/ompi_mpi_finalize.c +++ openmpi-1.3.3/ompi/runtime/ompi_mpi_finalize.c @@ -100,13 +100,18 @@ /* Note that if we're already finalized, we cannot raise an MPI exception. The best that we can do is write something to stderr. */ - char hostname[MAXHOSTNAMELEN]; + char *hostname = NULL; + size_t hostname_length = 128; pid_t pid = getpid(); - gethostname(hostname, sizeof(hostname)); + do { + hostname_length *= 2; + hostname = realloc(hostname, hostname_length); + } while ((gethostname(hostname, hostname_length) == -1) && (errno == ENAMETOOLONG)); orte_show_help("help-mpi-runtime.txt", "mpi_finalize:invoked_multiple_times", true, hostname, pid); + free(hostname); return MPI_ERR_OTHER; } --- openmpi-1.3.3.orig/ompi/runtime/ompi_mpi_abort.c +++ openmpi-1.3.3/ompi/runtime/ompi_mpi_abort.c @@ -53,10 +53,11 @@ bool kill_remote_of_intercomm) { int count = 0, i; - char *msg, *host, hostname[MAXHOSTNAMELEN]; + char *msg, *host = NULL; pid_t pid = 0; orte_process_name_t *abort_procs; orte_std_cntr_t nabort_procs; + bool free_host = false; /* Protection for recursive invocation */ if (have_been_invoked) { @@ -70,8 +71,12 @@ if (orte_initialized) { host = orte_process_info.nodename; } else { - gethostname(hostname, sizeof(hostname)); - host = hostname; + size_t host_length = 128; + do { + host_length *= 2; + host = realloc(host, host_length); + } while ((gethostname(host, host_length) == -1) && (errno == ENAMETOOLONG)); + free_host = true; } pid = getpid(); @@ -138,9 +143,17 @@ fprintf(stderr, "[%s:%d] Abort %s completed successfully; not able to guarantee that all other processes were killed!\n", host, (int) pid, ompi_mpi_finalized ? "after MPI_FINALIZE" : "before MPI_INIT"); + if (free_host) { + free(host); + } exit(errcode); } + if (free_host) { + free(host); + host = NULL; + } + /* abort local procs in the communicator. If the communicator is an intercommunicator AND the abort has explicitly requested that we abort the remote procs, then do that as well. */ --- openmpi-1.3.3.orig/orte/orted/orted_main.c +++ openmpi-1.3.3/orte/orted/orted_main.c @@ -197,7 +197,7 @@ int ret = 0; int fd; opal_cmd_line_t *cmd_line = NULL; - char log_file[PATH_MAX]; + char *log_file = NULL; char *jobidstring; char *rml_uri; int i; @@ -439,6 +439,7 @@ } /* define a log file name in the session directory */ + log_file = malloc(strlen(jobidstring) + strlen(orte_process_info.nodename) + 19); sprintf(log_file, "output-orted-%s-%s.log", jobidstring, orte_process_info.nodename); log_path = opal_os_path(false, @@ -446,6 +447,7 @@ orte_process_info.top_session_dir, log_file, NULL); + free(log_file); fd = open(log_path, O_RDWR|O_CREAT|O_TRUNC, 0640); if (fd < 0) {