Greg Padgett has uploaded a new change for review. Change subject: broker, agent: more robust service startup/shutdown ......................................................................
broker, agent: more robust service startup/shutdown The agent and broker had no ordering, and there was no way for an external program to reliably tell when the broker was ready to serve requests. With this change: - The agent and broker services gain an ordering dependency between them in systemd. - The broker attempts to clean its socket file upon exit, so that the existence of the file generally indicates that the broker is ready to serve requests. - The agent will be more patient in attempting to connect to the broker. Change-Id: I392ddf54503d7b9ef3448d22385b547222c0cc86 Bug-Url: https://bugzilla.redhat.com/1018829 Signed-off-by: Greg Padgett <[email protected]> --- M initscripts/ovirt-ha-agent.service M ovirt_hosted_engine_ha/agent/constants.py.in M ovirt_hosted_engine_ha/agent/hosted_engine.py M ovirt_hosted_engine_ha/broker/broker.py M ovirt_hosted_engine_ha/broker/listener.py M ovirt_hosted_engine_ha/lib/brokerlink.py 6 files changed, 46 insertions(+), 6 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-hosted-engine-ha refs/changes/61/20561/1 diff --git a/initscripts/ovirt-ha-agent.service b/initscripts/ovirt-ha-agent.service index 76344ff..8282bff 100644 --- a/initscripts/ovirt-ha-agent.service +++ b/initscripts/ovirt-ha-agent.service @@ -1,6 +1,7 @@ [Unit] Description=oVirt Hosted Engine High Availability Monitoring Agent -Requires=ovirt-ha-broker.service +Wants=ovirt-ha-broker.service +After=ovirt-ha-broker.service [Service] Type=forking @@ -10,3 +11,4 @@ [Install] WantedBy=multi-user.target +Also=ovirt-ha-broker.service diff --git a/ovirt_hosted_engine_ha/agent/constants.py.in b/ovirt_hosted_engine_ha/agent/constants.py.in index 20ce115..f05e74e 100644 --- a/ovirt_hosted_engine_ha/agent/constants.py.in +++ b/ovirt_hosted_engine_ha/agent/constants.py.in @@ -33,6 +33,7 @@ METADATA_BLOCK_BYTES = 512 SERVICE_TYPE = 'hosted-engine' +BROKER_CONNECTION_RETRIES = 10 HOST_ALIVE_TIMEOUT_SECS = 60 ENGINE_RETRY_EXPIRATION_SECS = 600 ENGINE_RETRY_COUNT = 3 diff --git a/ovirt_hosted_engine_ha/agent/hosted_engine.py b/ovirt_hosted_engine_ha/agent/hosted_engine.py index 89d3d53..c8cf5ad 100644 --- a/ovirt_hosted_engine_ha/agent/hosted_engine.py +++ b/ovirt_hosted_engine_ha/agent/hosted_engine.py @@ -278,7 +278,7 @@ if not self._broker: self._broker = brokerlink.BrokerLink() try: - self._broker.connect() + self._broker.connect(constants.BROKER_CONNECTION_RETRIES) except Exception as e: self._log.error("Failed to connect to ha-broker: %s", str(e)) raise diff --git a/ovirt_hosted_engine_ha/broker/broker.py b/ovirt_hosted_engine_ha/broker/broker.py index 0cf06c4..60eddf0 100644 --- a/ovirt_hosted_engine_ha/broker/broker.py +++ b/ovirt_hosted_engine_ha/broker/broker.py @@ -112,6 +112,7 @@ # Server shutdown... self._log.info("Server shutting down") + self._listener.clean_up() os.unlink(constants.PID_FILE) sys.exit(0) diff --git a/ovirt_hosted_engine_ha/broker/listener.py b/ovirt_hosted_engine_ha/broker/listener.py index 44085f9..80e888e 100644 --- a/ovirt_hosted_engine_ha/broker/listener.py +++ b/ovirt_hosted_engine_ha/broker/listener.py @@ -47,8 +47,7 @@ self._storage_broker_instance_access_lock = threading.Lock() self._need_exit = False - if os.path.exists(constants.BROKER_SOCKET_FILE): - os.unlink(constants.BROKER_SOCKET_FILE) + self._remove_socket_file() self._server = ThreadedStreamServer( constants.BROKER_SOCKET_FILE, ConnectionHandler, True, self) @@ -98,6 +97,18 @@ """ self._need_exit = True + def clean_up(self): + """ + Perform final listener cleanup. This is effectively the complement + of __init__(), to be called after the final call to listen() and + close_connections() have been made. + """ + self._remove_socket_file() + + def _remove_socket_file(self): + if os.path.exists(constants.BROKER_SOCKET_FILE): + os.unlink(constants.BROKER_SOCKET_FILE) + class ThreadedStreamServer(SocketServer.ThreadingMixIn, SocketServer.UnixStreamServer): diff --git a/ovirt_hosted_engine_ha/lib/brokerlink.py b/ovirt_hosted_engine_ha/lib/brokerlink.py index 3d23e6c..f53371b 100644 --- a/ovirt_hosted_engine_ha/lib/brokerlink.py +++ b/ovirt_hosted_engine_ha/lib/brokerlink.py @@ -21,6 +21,7 @@ import contextlib import logging import socket +import time from ..env import constants from ..lib.exceptions import DisconnectionError @@ -33,13 +34,19 @@ self._log = logging.getLogger("BrokerLink") self._socket = None - def connect(self): + def connect(self, retries=0): + """ + Connect to the HA Broker. Upon failure, reconnection attempts will + be made approximately once per second until the specified number of + retries have been made. An exception will be raised if a connection + cannot be established. + """ if self.is_connected(): return self._log.info("Connecting to ha-broker") + try: self._socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - self._socket.connect(constants.BROKER_SOCKET_FILE) except socket.error as e: self._log.error("Failed to connect to broker: %s", str(e)) if self._socket: @@ -47,6 +54,24 @@ self._socket = None raise + attempt = 0 + while True: + try: + self._socket.connect(constants.BROKER_SOCKET_FILE) + except socket.error as e: + if attempt < retries: + self._log.info("Failed to connect to broker: %s", str(e)) + self._log.info("Retrying broker connection...") + time.sleep(1) + continue + else: + self._log.error("Failed to connect to broker: %s", str(e)) + self._socket.close() + self._socket = None + raise + self._log.debug("Successfully connected") + break + def is_connected(self): return self._socket is not None -- To view, visit http://gerrit.ovirt.org/20561 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I392ddf54503d7b9ef3448d22385b547222c0cc86 Gerrit-PatchSet: 1 Gerrit-Project: ovirt-hosted-engine-ha Gerrit-Branch: master Gerrit-Owner: Greg Padgett <[email protected]> _______________________________________________ Engine-patches mailing list [email protected] http://lists.ovirt.org/mailman/listinfo/engine-patches
