On Sun, 04 Mar 2018 at 11:00:19 +0100, SZALAY Attila wrote:
> <VirtSubproc>: failure: timed out on client shared directory setup

I've been seeing this a lot recently. The attached patches are not a
full solution (it still sometimes fails, particularly under load) but
they seem to help.

    smcv
>From c94046aaf3195436fe495b5363d18a023e6aa8a7 Mon Sep 17 00:00:00 2001
From: Simon McVittie <s...@debian.org>
Date: Fri, 20 Apr 2018 09:05:40 +0100
Subject: [PATCH 1/4] qemu: Expect a prompt after successfully running "echo
 ok" on ttyS1

If we don't wait for a prompt after every command, then we run the
risk of losing track of which prompt corresponds to which command,
and thinking a command has completed successfully when it actually
hasn't.

Signed-off-by: Simon McVittie <s...@debian.org>
---
 virt/autopkgtest-virt-qemu | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/virt/autopkgtest-virt-qemu b/virt/autopkgtest-virt-qemu
index d89536c..57a7478 100755
--- a/virt/autopkgtest-virt-qemu
+++ b/virt/autopkgtest-virt-qemu
@@ -133,6 +133,8 @@ def check_ttyS1_shell():
         return True
     except VirtSubproc.Timeout:
         return False
+    else:
+        VirtSubproc.expect(term, b'#', 10)
 
 
 def setup_shell():
-- 
2.17.0

>From f7a86775d6040b02c97f44df9f8f3cb3ddb03ddc Mon Sep 17 00:00:00 2001
From: Simon McVittie <s...@debian.org>
Date: Fri, 20 Apr 2018 09:07:20 +0100
Subject: [PATCH 2/4] qemu: Turn shared directory setup into one long command

If we run this as four separate commands, then we'll get four shell
prompts, but because we only wait for the first, we'll get out of sync.

Signed-off-by: Simon McVittie <s...@debian.org>
---
 virt/autopkgtest-virt-qemu | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/virt/autopkgtest-virt-qemu b/virt/autopkgtest-virt-qemu
index 57a7478..6f3fef2 100755
--- a/virt/autopkgtest-virt-qemu
+++ b/virt/autopkgtest-virt-qemu
@@ -220,9 +220,9 @@ def setup_shared(shared_dir):
 
     term = VirtSubproc.get_unix_socket(os.path.join(workdir, 'ttyS1'))
 
-    term.send(b'''mkdir -p -m 1777 /run/autopkgtest/shared
-mount -t 9p -o trans=virtio,access=any autopkgtest /run/autopkgtest/shared
-chmod 1777 /run/autopkgtest/shared
+    term.send(b'''mkdir -p -m 1777 /run/autopkgtest/shared &&
+mount -t 9p -o trans=virtio,access=any autopkgtest /run/autopkgtest/shared &&
+chmod 1777 /run/autopkgtest/shared &&
 touch /run/autopkgtest/shared/done_shared
 ''')
 
-- 
2.17.0

>From 0693729fc4d89fe937d2f975ab8ce28c3a44139e Mon Sep 17 00:00:00 2001
From: Simon McVittie <s...@debian.org>
Date: Fri, 20 Apr 2018 09:09:52 +0100
Subject: [PATCH 3/4] qemu: Wait for shell prompt before flag file

Waiting for the commands to complete on the qemu side seems to be
a bit more reliable than waiting for the flag file on the host side.

Signed-off-by: Simon McVittie <s...@debian.org>
---
 virt/autopkgtest-virt-qemu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/autopkgtest-virt-qemu b/virt/autopkgtest-virt-qemu
index 6f3fef2..89efaea 100755
--- a/virt/autopkgtest-virt-qemu
+++ b/virt/autopkgtest-virt-qemu
@@ -225,12 +225,12 @@ mount -t 9p -o trans=virtio,access=any autopkgtest /run/autopkgtest/shared &&
 chmod 1777 /run/autopkgtest/shared &&
 touch /run/autopkgtest/shared/done_shared
 ''')
+    VirtSubproc.expect(term, b'#', 30)
 
     with VirtSubproc.timeout(10, 'timed out on client shared directory setup'):
         flag = os.path.join(shared_dir, 'done_shared')
         while not os.path.exists(flag):
             time.sleep(0.2)
-    VirtSubproc.expect(term, b'#', 30)
 
     # ensure that root has $HOME set
     term.send(b'[ -n "$HOME" ] || export HOME=`getent passwd root|cut -f6 -d:`\n')
-- 
2.17.0

>From cb61fac93e69bd356f3def50f548ebd0b0002d80 Mon Sep 17 00:00:00 2001
From: Simon McVittie <s...@debian.org>
Date: Fri, 20 Apr 2018 09:11:20 +0100
Subject: [PATCH 4/4] qemu: Manipulate shell prompt to match commands to
 responses

Instead of waiting for "#", which could legitimately appear in command
output, set PS1 to a random string and wait for that string to appear.

Signed-off-by: Simon McVittie <s...@debian.org>
---
 virt/autopkgtest-virt-qemu | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/virt/autopkgtest-virt-qemu b/virt/autopkgtest-virt-qemu
index 89efaea..0299353 100755
--- a/virt/autopkgtest-virt-qemu
+++ b/virt/autopkgtest-virt-qemu
@@ -36,6 +36,7 @@ import errno
 import fcntl
 import re
 import argparse
+import random
 
 sys.path.insert(0, '/usr/share/autopkgtest/lib')
 sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(
@@ -137,6 +138,17 @@ def check_ttyS1_shell():
         VirtSubproc.expect(term, b'#', 10)
 
 
+def remote_command(term, command, timeout=10):
+    if isinstance(command, str):
+        command = command.encode('utf-8')
+
+    r1 = random.randint(0, 0x7FFFFFFF)
+    r2 = random.randint(0, 0x7FFFFFFF)
+
+    term.send(b'PS1="%08x"-"%08x# "; %s' % (r1, r2, command))
+    VirtSubproc.expect(term, b'%08x-%08x#' % (r1, r2), timeout)
+
+
 def setup_shell():
     '''Log into the VM and set up root shell on ttyS1'''
 
@@ -197,12 +209,10 @@ def setup_baseimage():
 
     # Setup udev rules for /dev/baseimage; set link_priority to -1024 so
     # that the duplicate UUIDs of the partitions will have no effect.
-    term.send(b'''mkdir -p -m 0755 /run/udev/rules.d ; printf '# Created by autopkgtest-virt-qemu\\n%s\\n%s\\n%s\\n' 'KERNEL=="vd*[!0-9]", ENV{ID_SERIAL}=="BASEIMAGE", OPTIONS+="link_priority=-1024", SYMLINK+="baseimage", MODE="0664"' 'KERNEL=="vd*[0-9]",  ENV{ID_SERIAL}=="BASEIMAGE", OPTIONS+="link_priority=-1024"' 'KERNEL=="vd*", ENV{ID_SERIAL}=="BASEIMAGE", ENV{ID_FS_TYPE}:="", ENV{ID_FS_USAGE}:="", ENV{ID_FS_UUID}:=""' > /run/udev/rules.d/61-baseimage.rules\n''')
-    VirtSubproc.expect(term, b'#', 10)
+    remote_command(term, b'''mkdir -p -m 0755 /run/udev/rules.d ; printf '# Created by autopkgtest-virt-qemu\\n%s\\n%s\\n%s\\n' 'KERNEL=="vd*[!0-9]", ENV{ID_SERIAL}=="BASEIMAGE", OPTIONS+="link_priority=-1024", SYMLINK+="baseimage", MODE="0664"' 'KERNEL=="vd*[0-9]",  ENV{ID_SERIAL}=="BASEIMAGE", OPTIONS+="link_priority=-1024"' 'KERNEL=="vd*", ENV{ID_SERIAL}=="BASEIMAGE", ENV{ID_FS_TYPE}:="", ENV{ID_FS_USAGE}:="", ENV{ID_FS_UUID}:=""' > /run/udev/rules.d/61-baseimage.rules\n''')
     # Reload udev to make sure the rules take effect (udev only auto-
     # rereads rules every 3 seconds)
-    term.send(b'udevadm control --reload\n')
-    VirtSubproc.expect(term, b'#', 10)
+    remote_command(term, b'udevadm control --reload\n')
 
     # Add the base image as an additional drive
     monitor = VirtSubproc.get_unix_socket(os.path.join(workdir, 'monitor'))
@@ -211,8 +221,7 @@ def setup_baseimage():
     monitor.send(b'device_add virtio-blk-pci,drive=drive-baseimage,id=virtio-baseimage\n')
     VirtSubproc.expect(monitor, b'(qemu)', 10)
 
-    term.send(b'udevadm settle --exit-if-exists=/dev/baseimage\n')
-    VirtSubproc.expect(term, b'#', 10)
+    remote_command(term, b'udevadm settle --exit-if-exists=/dev/baseimage\n')
 
 
 def setup_shared(shared_dir):
@@ -220,12 +229,11 @@ def setup_shared(shared_dir):
 
     term = VirtSubproc.get_unix_socket(os.path.join(workdir, 'ttyS1'))
 
-    term.send(b'''mkdir -p -m 1777 /run/autopkgtest/shared &&
+    remote_command(term, b'''mkdir -p -m 1777 /run/autopkgtest/shared &&
 mount -t 9p -o trans=virtio,access=any autopkgtest /run/autopkgtest/shared &&
 chmod 1777 /run/autopkgtest/shared &&
 touch /run/autopkgtest/shared/done_shared
-''')
-    VirtSubproc.expect(term, b'#', 30)
+''', 30)
 
     with VirtSubproc.timeout(10, 'timed out on client shared directory setup'):
         flag = os.path.join(shared_dir, 'done_shared')
@@ -233,8 +241,7 @@ touch /run/autopkgtest/shared/done_shared
             time.sleep(0.2)
 
     # ensure that root has $HOME set
-    term.send(b'[ -n "$HOME" ] || export HOME=`getent passwd root|cut -f6 -d:`\n')
-    VirtSubproc.expect(term, b'#', 5)
+    remote_command(term, b'[ -n "$HOME" ] || export HOME=`getent passwd root|cut -f6 -d:`\n', 5)
 
     # create helper for runcmd: cat data from its stdin (from a file) to stdout
     # eternally (like tail -f), but stop once either an "EOF" file exists and
@@ -242,7 +249,7 @@ touch /run/autopkgtest/shared/done_shared
     # arg), or an "exit flag" file exists.
     # We don't run that from /run/autopkgtest/shared as 9p from older QEMU
     # versions is buggy and causes "invalid numeric result" errors on that.
-    term.send(b'''PYTHON=$(which python3) || PYTHON=$(which python); cat <<EOF > /tmp/eofcat; chmod 755 /tmp/eofcat
+    remote_command(term, b'''PYTHON=$(which python3) || PYTHON=$(which python); cat <<EOF > /tmp/eofcat; chmod 755 /tmp/eofcat
 #!$PYTHON
 import sys, os, fcntl, time, errno
 (feof, fexit) = sys.argv[1:]
@@ -271,8 +278,7 @@ while not os.path.exists(fexit):
     if limit is not None and count >= limit:
         break
 EOF
-''')
-    VirtSubproc.expect(term, b'# ', 5)
+''', 5)
 
 
 def setup_config(shared_dir):
@@ -294,8 +300,7 @@ def setup_config(shared_dir):
 
         if tz:
             adtlog.debug('Copying host timezone %s to VM' % tz.decode())
-            term.send(b'echo ' + tz + b' > /etc/timezone; DEBIAN_FRONTEND=noninteractive dpkg-reconfigure tzdata\n')
-            VirtSubproc.expect(term, b'#', 30)
+            remote_command(term, b'echo ' + tz + b' > /etc/timezone; DEBIAN_FRONTEND=noninteractive dpkg-reconfigure tzdata\n', 30)
         else:
             adtlog.debug('Could not determine host timezone')
 
-- 
2.17.0

Reply via email to