Greg Padgett has uploaded a new change for review. Change subject: core: Fix errors for retry of failed DestroyImageCommand ......................................................................
core: Fix errors for retry of failed DestroyImageCommand Fix issues encountered while re-executing Live Merge after a failure of DestroyImageCommand: 1. Some images may have been marked illegal from a prior failure; this is an expected condition so allow live merge execution in this case. 2. The endSuccessfully and endWithFailure methods should always indicate their own success using setSucceeded(true) to avoid endAction retries. 3. If the DestroyImage SPM async task was started and converged before CoCo called doPolling() again on RemoveSnapshotSingleDiskLive, the child command list was not updated. Subsequently the end action methods were not able to call endAction on DestroyImageCommand and no commands in the Live Merge command tree would converge. This is fixed by updating the child command list before referencing it to ensure all children are present. Change-Id: Icbbcc30fa035de82de05c5c60ce224ed36aee831 Bug-Url: https://bugzilla.redhat.com/1213157 Signed-off-by: Greg Padgett <gpadg...@redhat.com> --- M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RemoveSnapshotCommand.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RemoveSnapshotSingleDiskLiveCommand.java 2 files changed, 38 insertions(+), 21 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/53/40453/1 diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RemoveSnapshotCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RemoveSnapshotCommand.java index 233927e..e63c473 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RemoveSnapshotCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RemoveSnapshotCommand.java @@ -394,7 +394,8 @@ DiskImagesValidator diskImagesValidator = new DiskImagesValidator(imagesToValidate); return validate(diskImagesValidator.diskImagesNotLocked()) && - validate(diskImagesValidator.diskImagesNotIllegal()); + (getVm().isQualifiedForLiveSnapshotMerge() + || validate(diskImagesValidator.diskImagesNotIllegal())); } protected boolean validateImageNotInTemplate() { diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RemoveSnapshotSingleDiskLiveCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RemoveSnapshotSingleDiskLiveCommand.java index 04b1c43..1ad758f 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RemoveSnapshotSingleDiskLiveCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RemoveSnapshotSingleDiskLiveCommand.java @@ -66,18 +66,9 @@ getParameters().setChildCommands(new HashMap<RemoveSnapshotSingleDiskLiveStep, Guid>()); } - List<Guid> childCommandIds = CommandCoordinatorUtil.getChildCommandIds(getCommandId()); - if (childCommandIds.size() != getParameters().getChildCommands().size()) { - // Upon recovery or after invoking a new child command, our map may be missing an entry - for (Guid id : childCommandIds) { - if (!getParameters().getChildCommands().containsValue(id)) { - getParameters().getChildCommands().put(getParameters().getCommandStep(), id); - break; - } - } - } - Guid currentChildId = getParameters().getChildCommands().get( - getParameters().getCommandStep()); + // Upon recovery or after invoking a new child command, our map may be missing an entry + syncChildCommandList(); + Guid currentChildId = getCurrentChildId(); VdcReturnValueBase vdcReturnValue = null; if (currentChildId != null) { @@ -160,7 +151,30 @@ persistCommand(getParameters().getParentCommand(), true); if (nextCommand != null) { CommandCoordinatorUtil.executeAsyncCommand(nextCommand.getFirst(), nextCommand.getSecond(), cloneContextAndDetachFromParent()); + // Add the child, but wait, it's a race! child will start, task may spawn, get polled, and we won't have the child id } + } + + /** + * Updates (but does not persist) the parameters.childCommands list to ensure the current + * child command is present. This is necessary in various entry points called externally + * (e.g. by endAction()), which can be called after a child command is started but before + * the main proceedCommandExecution() loop has persisted the updated child list. + */ + private void syncChildCommandList() { + List<Guid> childCommandIds = CommandCoordinatorUtil.getChildCommandIds(getCommandId()); + if (childCommandIds.size() != getParameters().getChildCommands().size()) { + for (Guid id : childCommandIds) { + if (!getParameters().getChildCommands().containsValue(id)) { + getParameters().getChildCommands().put(getParameters().getCommandStep(), id); + break; + } + } + } + } + + private Guid getCurrentChildId() { + return getParameters().getChildCommands().get(getParameters().getCommandStep()); } private RemoveSnapshotSingleDiskLiveStep getInitialMergeStepForImage(Guid imageId) { @@ -378,9 +392,12 @@ @Override protected void endSuccessfully() { + // SPM tasks report their final status to the spawning command's parent (in this case, us) + // rather than the spawning command (ie DestroyImageCommand); therefore, we must redirect + // endAction so that upon SPM task completion the status is propagated to the proper command. if (getParameters().getCommandStep() == RemoveSnapshotSingleDiskLiveStep.DESTROY_IMAGE) { - Guid currentChildId = getParameters().getChildCommands().get( - getParameters().getCommandStep()); + syncChildCommandList(); + Guid currentChildId = getCurrentChildId(); if (!Guid.isNullOrEmpty(currentChildId)) { CommandBase<?> command = CommandCoordinatorUtil.retrieveCommand(currentChildId); if (command != null) { @@ -390,9 +407,8 @@ CommandCoordinatorUtil.getCommandEntity(currentChildId).setCallBackNotified(true); } } - } else { - setSucceeded(true); } + setSucceeded(true); } public void onFailed() { @@ -433,9 +449,10 @@ @Override protected void endWithFailure() { + // See comment in endSuccessfully() for an explanation of this redirection if (getParameters().getCommandStep() == RemoveSnapshotSingleDiskLiveStep.DESTROY_IMAGE) { - Guid currentChildId = getParameters().getChildCommands().get( - getParameters().getCommandStep()); + syncChildCommandList(); + Guid currentChildId = getCurrentChildId(); if (!Guid.isNullOrEmpty(currentChildId)) { CommandBase<?> command = CommandCoordinatorUtil.retrieveCommand(currentChildId); if (command != null) { @@ -445,9 +462,8 @@ cloneContextAndDetachFromParent()); } } - } else { - setSucceeded(true); } + setSucceeded(true); } @Override -- To view, visit https://gerrit.ovirt.org/40453 To unsubscribe, visit https://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Icbbcc30fa035de82de05c5c60ce224ed36aee831 Gerrit-PatchSet: 1 Gerrit-Project: ovirt-engine Gerrit-Branch: ovirt-engine-3.5 Gerrit-Owner: Greg Padgett <gpadg...@redhat.com> _______________________________________________ Engine-patches mailing list Engine-patches@ovirt.org http://lists.ovirt.org/mailman/listinfo/engine-patches