Tracking down EIO problem when invoking external commands via dejagnu

Simon Josefsson Sat, 23 Aug 2008 04:55:58 -0700

Hi!

The GNU Mailutils self-tests (which uses DejaGnu) fails randomly when
/bin/cp from modern CoreUtils is invoked via DejaGnu.  I'm using cp from
coreutils 6.10.  The crucial thing is likely the 'at_exit (close_stdin)'
call.  /bin/cp from Coreutils 5.97 works, and it doesn't close stdin
when it exits.


See earlier discussion with some strace output and more details:

http://thread.gmane.org/gmane.comp.gnu.mailutils.bugs/1190/focus=1202

This may be a tcl or expect bug, but I suspect 'local_exec' in DejaGnu's
remote.exp is the problem.  I've created a small snippet that reproduce
the problem.  It contains a stripped down copy of local_exec.  Here's
the output:

[EMAIL PROTECTED]:~$ expect foo.exp
spawn cp /dev/null /tmp/foo
[EMAIL PROTECTED]:~$ expect foo.exp
spawn cp /dev/null /tmp/foo
[EMAIL PROTECTED]:~$ expect foo.exp
spawn cp /dev/null /tmp/foo
[EMAIL PROTECTED]:~$ expect foo.exp
spawn cp /dev/null /tmp/foo
[EMAIL PROTECTED]:~$ expect foo.exp
spawn cp /dev/null /tmp/foo
[EMAIL PROTECTED]:~$ expect foo.exp
spawn cp /dev/null /tmp/foo
echo ERROR: Cannot create: 
[EMAIL PROTECTED]:~$

Alas, I'm not familiar enough with tcl and external processes to really
debug where this fails and what the proper solution would be.

Ideas?

Thanks,
/Simon

proc perror { args } {
    set message [lindex $args 0]

    puts "echo ERROR: $message";
}

# Run the specified COMMANDLINE on the local machine, redirecting input
# to file INP (if non-empty), redirecting output to file OUTP (if non-empty),
# and waiting TIMEOUT seconds for the command to complete before killing
# it. A two-member list is returned; the first member is the exit status
# of the command, the second is any output produced from the command
# (if output is redirected, this may or may not be empty). If output is
# redirected, both stdout and stderr will appear in the specified file.
#
# Caveats: A pipeline is used if input or output is redirected. There
# will be problems with killing the program if a pipeline is used. Either
# the "tee" command or the "cat" command is used in the pipeline if input
# or output is redirected. If the program needs to be killed, /bin/sh and
# the kill command will be invoked.
#
proc local_exec { commandline inp outp timeout } {
    # Tcl's exec is a pile of crap. It does two very inappropriate things
    # firstly, it has no business returning an error if the program being
    # executed happens to write to stderr. Secondly, it appends its own
    # error messages to the output of the command if the process exits with
    # non-zero status.
    #
    # So, ok, we do this funny stuff with using spawn sometimes and
    # open others because of spawn's inability to invoke commands with
    # redirected I/O. We also hope that nobody passes in a command that's
    # a pipeline, because spawn can't handle it.
    #
    # We want to use spawn in most cases, because tcl's pipe mechanism
    # doesn't assign process groups correctly and we can't reliably kill
    # programs that bear children. We can't use tcl's exec because it has
    # no way to timeout programs that hang. *sigh*

    global errorInfo
    if { "$inp" == "" && "$outp" == "" } {
        set id -1
        set result [catch "eval spawn \{${commandline}\}" pid]
        if { $result == 0 } {
            set result2 0
        } else {
            set pid 0
            set result2 5
        }
    } else {
        # Can you say "uuuuuugly"? I knew you could!
        # All in the name of non-infinite hangs.
        if { $inp != "" } {
            set inp "< $inp"
            set mode "r"
        } else {
            set mode "w"
        }

        set use_tee 0
        # We add |& cat so that Tcl exec doesn't freak out if the
        # program writes to stderr.
        if { $outp == "" } {
            set outp "|& cat"
        } else {
            set outpf "$outp"
            set outp "> $outp"
            if { $inp != "" } {
                set use_tee 1
            }
        }
        # Why do we use tee? Because open can't redirect both input and output.
        if { $use_tee } {
            set result [catch {open "| ${commandline} $inp |& tee $outpf" 
RDONLY} id]
        } else {
            set result [catch {open "| ${commandline} $inp $outp" $mode} id]
        }

        if { $result != 0 } {
            return [list -1 "open of $commandline $inp $outp failed: 
$errorInfo"]
        }
        set pid [pid $id]
        set result [catch "spawn -leaveopen $id" result2]
    }
    # Prepend "-" to each pid, to generate the "process group IDs" needed by
    # kill.
    set pgid "-[join $pid { -}]"
    #verbose "pid is $pid $pgid"
    if { $result != 0 || $result2 != 0 } {
        # This shouldn't happen.
        if {[info exists errorInfo]} {
            set foo $errorInfo
        } else {
            set foo ""
        }
        #verbose "spawn -open $id failed, $result $result2, $foo"
        catch "close $id"
        return [list -1 "spawn failed"]
    }

    set got_eof 0
    set output ""

    # Wait for either $timeout seconds to elapse, or for the program to
    # exit.
    expect {
        -i $spawn_id -timeout $timeout -re ".+" {
            append output $expect_out(buffer)
            if { [string length $output] < 512000 } {
                exp_continue -continue_timer
            }
        }
        timeout {
            warning "program timed out."
        }
        eof {
            set got_eof 1
        }
    }

    # Uuuuuuugh. Now I'm getting really sick.
    # If we didn't get an EOF, we have to kill the poor defenseless program.
    # However, Tcl has no kill primitive, so we have to execute an external
    # command in order to execute the execution. (English. Gotta love it.)
    if { ! $got_eof } {
        #verbose "killing $pid $pgid"
        # This is very, very nasty. SH, instead of EXPECT, is used to
        # run this in the background since, on older CYGWINs, a
        # strange file I/O error occures.
        exec sh -c "exec > /dev/null 2>&1 && (kill -2 $pgid || kill -2 $pid) && 
sleep 5 && (kill -15 $pgid || kill $pid) && sleep 5 && (kill -9 $pgid || kill 
-9 $pid) &"
    }
    # This will hang if the kill doesn't work. Nothin' to do, and it's not ok.
    catch "close -i $spawn_id"
    set r2 [catch "wait -i $spawn_id" wres]
    if { $id > 0 } {
        set r2 [catch "close $id" res]
    } else {
        #verbose "waitres is $wres" 2
        if { $r2 == 0 } {
            set r2 [lindex $wres 3]
            if { [llength $wres] > 4 } {
                if { [lindex $wres 4] == "CHILDKILLED" } {
                    set r2 1
                }
            }
            if { $r2 != 0 } {
                set res "$wres"
            } else {
                set res ""
            }
        } else {
            set res "wait failed"
        }
    }
    if { $r2 != 0 || $res != "" || ! $got_eof } {
        #verbose "close result is $res"
        set status 1
    } else {
        set status 0
    }
    #verbose "output is $output status $status"
    if { $outp == "" || $outp == "|& cat" } {
        return [list $status $output]
    } else {
        return [list $status ""]
    }
}

proc remote_exec { hostname program args } {
    if { [llength $args] > 0 } {
        set pargs [lindex $args 0]
    } else {
        set pargs ""
    }

    if { [llength $args] > 1 } {
        set inp "[lindex $args 1]"
    } else {
        set inp ""
    }

    if { [llength $args] > 2 } {
        set outp "[lindex $args 2]"
    } else {
        set outp ""
    }

    # 300 is probably a lame default.
    if { [llength $args] > 3 } {
        set timeout "[lindex $args 3]"
    } else {
        set timeout 300
    }

    return [local_exec "$program $pargs" $inp $outp $timeout]
}

proc foo {} {
          set output [remote_exec host "cp /dev/null /tmp/foo"]
          if [lindex $output 0] {
              perror "Cannot create: [lindex $output 1]"
              exit 1
          }
      }

foo;

_______________________________________________
DejaGnu mailing list
DejaGnu@gnu.org
http://lists.gnu.org/mailman/listinfo/dejagnu

Tracking down EIO problem when invoking external commands via dejagnu

Reply via email to