Martin Sivák has uploaded a new change for review. Change subject: Add clean shutdown flag and checksum to the published metadata ......................................................................
Add clean shutdown flag and checksum to the published metadata The shutdown flag is published only when the agent shuts down cleanly and the checksum (crc32) is used to protect the metadata section from corruption. Both features will be then used together with sanlock lease file to make sure a host can be decommisioned. Change-Id: Idbcc3a345a08a03e95edcbdf9f3c6f7e6fa1a5fd Signed-off-by: Martin Sivak <[email protected]> --- M ovirt_hosted_engine_ha/agent/hosted_engine.py M ovirt_hosted_engine_ha/agent/states.py M ovirt_hosted_engine_ha/lib/metadata.py 3 files changed, 76 insertions(+), 15 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-hosted-engine-ha refs/changes/76/38276/1 diff --git a/ovirt_hosted_engine_ha/agent/hosted_engine.py b/ovirt_hosted_engine_ha/agent/hosted_engine.py index 82c2b61..a96ceb0 100644 --- a/ovirt_hosted_engine_ha/agent/hosted_engine.py +++ b/ovirt_hosted_engine_ha/agent/hosted_engine.py @@ -26,6 +26,7 @@ import socket import subprocess import time +import binascii import sanlock @@ -40,6 +41,7 @@ from ..lib import vds_client as vdsc from ..lib.storage_backends import StorageBackendTypes, VdsmBackend from .state_machine import EngineStateMachine +from .states import AgentStopped class MetadataTooNewError(Exception): @@ -290,6 +292,11 @@ def host_id(self): return int(self._config.get(config.ENGINE, config.HOST_ID)) + def publish(self, state): + blocks = self._generate_local_blocks(state) + self._push_to_storage(blocks) + self.update_hosts_state(state) + def start_monitoring(self): error_count = 0 @@ -332,9 +339,7 @@ state.data.best_score_host["score"]) # publish the current state - blocks = self._generate_local_blocks(state) - self._push_to_storage(blocks) - self.update_hosts_state(state) + self.publish(state) except Exception as e: self._log.warning("Error while monitoring engine: %s", str(e)) if not (isinstance(e, ex.DisconnectionError) or @@ -357,6 +362,10 @@ self._log.log(log_level, "Sleeping %d seconds", delay) time.sleep(delay) + + # Publish stopped status + stopped = AgentStopped(self.fsm.state.data) + self.publish(stopped) self._log.debug("Disconnecting from ha-broker") if self._broker and self._broker.is_connected(): @@ -677,17 +686,34 @@ score = state.score(self.fsm.logger) lm = state.data.stats.local md = state.metadata() - data = ("{md_parse_vers}|{md_feature_vers}|{ts_int}" - "|{host_id}|{score}|{engine_status}|{name}|{maintenance}" - .format(md_parse_vers=constants.METADATA_PARSE_VERSION, - md_feature_vers=constants.METADATA_FEATURE_VERSION, - # system timestamp - ts_int=state.data.stats.collect_start, - host_id=state.data.stats.host_id, - score=score, - engine_status=json.dumps(lm['engine-health']), - name=self._hostname, - maintenance=1 if md["maintenance"] else 0)) + + tokens = [] + # Metadata lowest compatible version + tokens.append(constants.METADATA_PARSE_VERSION) + # Metadata highest compatible version + tokens.append(constants.METADATA_FEATURE_VERSION) + # System timestamp + tokens.append(state.data.stats.collect_start) + # Host ID + tokens.append(state.data.stats.host_id) + # Host score + tokens.append(score) + # Engine status + tokens.append(json.dumps(lm['engine-health'])) + # System hostname + tokens.append(self._hostname) + # Local maintenance flag + tokens.append(1 if md["maintenance"] else 0) + # Agent stopped cleanly flag + tokens.append(1 if "stopped" in md and md["stopped"] else 0) + # CRC32 in hex (use 0 for computing the crc) + tokens.append(metadata.EMPTY_CRC32) + + data = "|".join(str(t) for t in tokens) + crc32 = metadata.CRC32_FORMAT % (binascii.crc32(data) & 0xffffffff) + tokens[9] = crc32 + data = "|".join(str(t) for t in tokens) + if len(data) > constants.METADATA_BLOCK_BYTES: raise Exception("Output metadata too long ({0} bytes)" .format(data)) diff --git a/ovirt_hosted_engine_ha/agent/states.py b/ovirt_hosted_engine_ha/agent/states.py index 90446a9..047c19f 100644 --- a/ovirt_hosted_engine_ha/agent/states.py +++ b/ovirt_hosted_engine_ha/agent/states.py @@ -188,10 +188,21 @@ def metadata(self): data = {"state": self.__class__.__name__, - "maintenance": False} + "maintenance": False, + "stopped": False} return data +class AgentStopped(EngineState): + def score(self, logger): + return 0 + + def metadata(self): + md = super(AgentStopped, self).metadata() + md["stopped"] = True + return md + + class LocalMaintenance(EngineState): """ This state is entered any time the host gets to local maintenance state. diff --git a/ovirt_hosted_engine_ha/lib/metadata.py b/ovirt_hosted_engine_ha/lib/metadata.py index 43fdff3..16a974e 100644 --- a/ovirt_hosted_engine_ha/lib/metadata.py +++ b/ovirt_hosted_engine_ha/lib/metadata.py @@ -18,12 +18,15 @@ # import re +import binascii from ..env import constants from ..lib import util from exceptions import FatalMetadataError from exceptions import MetadataError +EMPTY_CRC32 = '00000000' +CRC32_FORMAT = "%08x" def to_bool_rep(value): """ @@ -104,6 +107,11 @@ name - hostname of described host maintenance - 0 or 1 representing host is operational or in local maintenance + running - 0 or 1 representing the agent is running or + stopped (cleanly). + crc32 - eight hex characters representing the crc32 of the + whole 512B block (crc32 is computed with the field set + to '00000000'). - Next 3584 bytes (for a total of 4096): human-readable description of data to aid in debugging, including factors considered in the host score @@ -181,6 +189,22 @@ if len(tokens) >= 8: ret['maintenance'] = int(tokens[7]) > 0 + # support stopped cleanly flag if present, but ignore if it isn't + if len(tokens) >= 9: + ret['stopped'] = int(tokens[8]) > 0 + + # support crc32 field if present, but ignore if it isn't + if len(tokens) >= 10: + ret['crc32'] = tokens[9] + tokens[9] = EMPTY_CRC32 + data = "|".join(tokens) + crc32 = CRC32_FORMAT % (binascii.crc32(data) & 0xffffffff) + if ret['crc32'] != crc32: + raise MetadataError("Malformed metadata for host {0}:" + " provided checksum {1} does not match" + " the data {2}." + .format(host_id, ret[9], crc32)) + # Add human-readable summary from bytes 512+ extra = data[512:].rstrip('\0') if len(extra): -- To view, visit https://gerrit.ovirt.org/38276 To unsubscribe, visit https://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Idbcc3a345a08a03e95edcbdf9f3c6f7e6fa1a5fd Gerrit-PatchSet: 1 Gerrit-Project: ovirt-hosted-engine-ha Gerrit-Branch: master Gerrit-Owner: Martin Sivák <[email protected]> _______________________________________________ Engine-patches mailing list [email protected] http://lists.ovirt.org/mailman/listinfo/engine-patches
