Jiří Moskovčák has uploaded a new change for review.

Change subject: try harder when initializing the sanlock
......................................................................

try harder when initializing the sanlock

the storage initialization is async task, so sometimes
we try to get the lock before the lock file exists, in
that case we should just wait a while and try again

Change-Id: Iac39bedfa78479f71674c3f5e673ba3814f52279
Signed-off-by: Jiri Moskovcak <jmosk...@redhat.com>
---
M ovirt_hosted_engine_ha/agent/hosted_engine.py
1 file changed, 46 insertions(+), 31 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-hosted-engine-ha 
refs/changes/47/28047/1

diff --git a/ovirt_hosted_engine_ha/agent/hosted_engine.py 
b/ovirt_hosted_engine_ha/agent/hosted_engine.py
index 44dfe72..85e9123 100644
--- a/ovirt_hosted_engine_ha/agent/hosted_engine.py
+++ b/ovirt_hosted_engine_ha/agent/hosted_engine.py
@@ -461,37 +461,52 @@
                            " is acquired (file: %s)",
                            constants.LOCKSPACE_NAME, self.host_id, lease_file)
 
-        try:
-            sanlock.add_lockspace(constants.LOCKSPACE_NAME,
-                                  self.host_id, lease_file)
-        except sanlock.SanlockException as e:
-            acquired_lock = False
-            msg = None
-            if hasattr(e, 'errno'):
-                if e.errno == errno.EEXIST:
-                    self._log.debug("Host already holds lock")
-                    acquired_lock = True
-                elif e.errno == errno.EINVAL:
-                    msg = ("cannot get lock on host id {0}:"
-                           " host already holds lock on a different host id"
-                           .format(self.host_id))
-                elif e.errno == errno.EINTR:
-                    msg = ("cannot get lock on host id {0}:"
-                           " sanlock operation interrupted (will retry)"
-                           .format(self.host_id))
-                elif e.errno == errno.EINPROGRESS:
-                    msg = ("cannot get lock on host id {0}:"
-                           " sanlock operation in progress (will retry)"
-                           .format(self.host_id))
-            if not acquired_lock:
-                if not msg:
-                    msg = ("cannot get lock on host id {0}: {1}"
-                           .format(self.host_id, str(e)))
-                self._log.error(msg, exc_info=True)
-                raise Exception("Failed to initialize sanlock: {0}"
-                                .format(msg))
-        else:
-            self._log.info("Acquired lock on host id %d", self.host_id)
+        max_attempts = 5
+        attempt_delay = 2  # how many secs should we wait before next attempt
+        for attempt in xrange(max_attempts):
+            try:
+                sanlock.add_lockspace(constants.LOCKSPACE_NAME,
+                                      self.host_id, lease_file)
+            except sanlock.SanlockException as e:
+                if hasattr(e, 'errno'):
+                    if e.errno == errno.EEXIST:
+                        self._log.debug("Host already holds lock")
+                        break
+                    elif e.errno == errno.EINVAL:
+                        self._log.error("cannot get lock on host id {0}: "
+                                        "host already holds lock on a 
different"
+                                        " host id"
+                                        .format(self.host_id))
+                        raise  # this shouldn't happen, so throw the exception
+                    elif e.errno == errno.EINTR:
+                        self._log.warn("cannot get lock on host id {0}:"
+                                       " sanlock operation interrupted"
+                                       " (will retry)"
+                                       .format(self.host_id))
+                    elif e.errno == errno.EINPROGRESS:
+                        self._log.warn("cannot get lock on host id {0}:"
+                                       " sanlock operation in progress"
+                                       "(will retry)"
+                                       .format(self.host_id))
+                    elif e.errno == errno.ENOENT:
+                        self._log.warn("cannot get lock on host id {0}:"
+                                       " the lock file '{1}' is missing"
+                                       "(will retry)"
+                                       .format(self.host_id, lease_file))
+            else:  # no exception, we acquired the lock
+                self._log.info("Acquired lock on host id %d", self.host_id)
+                break
+
+            # some temporary problem has occurred (usually waiting for
+            # the storage), so wait a while and try again
+            self._log.info("Failed to acquire the lock Waiting '{0}' before"
+                           " the next attempt".format(attempt_delay))
+            time.sleep(attempt_delay)
+        else:  # happens only if all attempts are exhausted
+            raise Exception("Failed to initialize sanlock, number of errors 
has"
+                            " exceeded the limit")
+
+        # we get here only if the the lock is acquired
         self._sanlock_initialized = True
 
     def _initialize_domain_monitor(self):


-- 
To view, visit http://gerrit.ovirt.org/28047
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iac39bedfa78479f71674c3f5e673ba3814f52279
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-hosted-engine-ha
Gerrit-Branch: master
Gerrit-Owner: Jiří Moskovčák <jmosk...@redhat.com>
_______________________________________________
Engine-patches mailing list
Engine-patches@ovirt.org
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to