[
https://issues.apache.org/jira/browse/AURORA-1054?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14315172#comment-14315172
]
Bill Farner commented on AURORA-1054:
-------------------------------------
This test flaked the build twice yesterday (output captured below). [~wickman]
how do you think we should proceed?
{noformat}
self = <test_thermos_task_runner.TestThermosTaskRunnerIntegration object at
0x7f5299b93190>
def test_integration_stop(self):
with self.yield_sleepy(ThermosTaskRunner, sleep=1000, exit_code=0) as
task_runner:
task_runner.start()
task_runner.forked.wait()
assert task_runner.status is None
> task_runner.stop()
src/test/python/apache/aurora/executor/test_thermos_task_runner.py:173:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <apache.aurora.executor.thermos_task_runner.ThermosTaskRunner object at
0x7f5299b68850>
timeout = Amount(1, mins)
def stop(self, timeout=MAX_WAIT):
"""Stop the runner. If it's already completed, no-op. If it's still
running, issue a kill."""
log.info('ThermosTaskRunner is shutting down.')
if not self.forking.is_set():
raise TaskError('Failed to call TaskRunner.start.')
log.info('Invoking runner HTTP teardown.')
self._terminate_http()
log.info('Invoking runner.kill')
self.kill()
waited = Amount(0, Time.SECONDS)
while self.is_alive and waited < timeout:
self._clock.sleep(self.POLL_INTERVAL.as_(Time.SECONDS))
waited += self.POLL_INTERVAL
if not self.is_alive and self.task_state() != TaskState.ACTIVE:
return
log.info('Thermos task did not shut down cleanly, rebinding to kill.')
self.quitquitquit()
while not self._monitor.finished and waited < timeout:
self._clock.sleep(self.POLL_INTERVAL.as_(Time.SECONDS))
waited += self.POLL_INTERVAL
if not self._monitor.finished:
> raise TaskError('Task did not stop within deadline.')
E TaskError: Task did not stop within deadline.
/tmp/user/20000/tmpbtYWk7/apache/aurora/executor/thermos_task_runner.py:348:
TaskError
{noformat}
{noformat}
self = <test_thermos_task_runner.TestThermosTaskRunnerIntegration object at
0x7fc2350691d0>
def test_integration_stop(self):
with self.yield_sleepy(ThermosTaskRunner, sleep=1000, exit_code=0) as
task_runner:
task_runner.start()
task_runner.forked.wait()
assert task_runner.status is None
> task_runner.stop()
src/test/python/apache/aurora/executor/test_thermos_task_runner.py:173:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <apache.aurora.executor.thermos_task_runner.ThermosTaskRunner object at
0x7fc235040850>
timeout = Amount(1, mins)
def stop(self, timeout=MAX_WAIT):
"""Stop the runner. If it's already completed, no-op. If it's still
running, issue a kill."""
log.info('ThermosTaskRunner is shutting down.')
if not self.forking.is_set():
raise TaskError('Failed to call TaskRunner.start.')
log.info('Invoking runner HTTP teardown.')
self._terminate_http()
log.info('Invoking runner.kill')
self.kill()
waited = Amount(0, Time.SECONDS)
while self.is_alive and waited < timeout:
self._clock.sleep(self.POLL_INTERVAL.as_(Time.SECONDS))
waited += self.POLL_INTERVAL
if not self.is_alive and self.task_state() != TaskState.ACTIVE:
return
log.info('Thermos task did not shut down cleanly, rebinding to kill.')
self.quitquitquit()
while not self._monitor.finished and waited < timeout:
self._clock.sleep(self.POLL_INTERVAL.as_(Time.SECONDS))
waited += self.POLL_INTERVAL
if not self._monitor.finished:
> raise TaskError('Task did not stop within deadline.')
E TaskError: Task did not stop within deadline.
/tmp/user/20000/tmp3UBcyp/apache/aurora/executor/thermos_task_runner.py:348:
TaskError
{noformat}
> src.test.python.apache.aurora.executor.thermos_task_runner appears to be flaky
> ------------------------------------------------------------------------------
>
> Key: AURORA-1054
> URL: https://issues.apache.org/jira/browse/AURORA-1054
> Project: Aurora
> Issue Type: Story
> Components: Executor
> Reporter: Bill Farner
>
> I've seen this test fail on a few reviews recently, but succeeds on a retry.
> {noformat}
> ==================== FAILURES ====================
> TestThermosTaskRunnerIntegration.test_integration_stop
>
> self =
> <test_thermos_task_runner.TestThermosTaskRunnerIntegration object at
> 0x7ff3e1dc6fd0>
>
> [1m def test_integration_stop(self):[0m
> [1m with self.yield_sleepy(ThermosTaskRunner,
> sleep=1000, exit_code=0) as task_runner:[0m
> [1m task_runner.start()[0m
> [1m task_runner.forked.wait()[0m
> [1m [0m
> [1m assert task_runner.status is None[0m
> [1m [0m
> [1m task_runner.stop()[0m
> [1m [0m
> [1m> assert task_runner.status is not None[0m
> [1m[31mE assert None is not None[0m
> [1m[31mE + where None =
> <apache.aurora.executor.thermos_task_runner.ThermosTaskRunner object at
> 0x7ff3e3333490>.status[0m
>
>
> src/test/python/apache/aurora/executor/test_thermos_task_runner.py:175:
> AssertionError
> -------------- Captured stderr call --------------
> Writing log files to disk in /tmp/user/2396/tmpqHF5bz
> ERROR] Could not quitquitquit runner: Cannot take
> control of a task in terminal state.
> generated xml file:
> /home/jenkins/jenkins-slave/workspace/AuroraBot/dist/test-results/src.test.python.apache.aurora.executor.thermos_task_runner.xml
>
> [1m[31m====== 1 failed, 7 passed in 81.16 seconds
> =======[0m
> src.test.python.apache.aurora.admin.admin
> ..... SUCCESS
> src.test.python.apache.aurora.admin.host_maintenance
> ..... SUCCESS
> src.test.python.apache.aurora.admin.maintenance
> ..... SUCCESS
> src.test.python.apache.aurora.client.api.api
> ..... SUCCESS
>
> src.test.python.apache.aurora.client.api.instance_watcher
> ..... SUCCESS
> src.test.python.apache.aurora.client.api.job_monitor
> ..... SUCCESS
> src.test.python.apache.aurora.client.api.mux
> ..... SUCCESS
> src.test.python.apache.aurora.client.api.quota_check
> ..... SUCCESS
> src.test.python.apache.aurora.client.api.restarter
> ..... SUCCESS
>
> src.test.python.apache.aurora.client.api.scheduler_client
> ..... SUCCESS
> src.test.python.apache.aurora.client.api.sla
> ..... SUCCESS
> src.test.python.apache.aurora.client.api.task_util
> ..... SUCCESS
> src.test.python.apache.aurora.client.api.updater
> ..... SUCCESS
> src.test.python.apache.aurora.client.api.updater_util
> ..... SUCCESS
> src.test.python.apache.aurora.client.base
> ..... SUCCESS
> src.test.python.apache.aurora.client.binding_helper
> ..... SUCCESS
> src.test.python.apache.aurora.client.cli.api
> ..... SUCCESS
> src.test.python.apache.aurora.client.cli.client
> ..... SUCCESS
> src.test.python.apache.aurora.client.cli.command_hooks
> ..... SUCCESS
> src.test.python.apache.aurora.client.cli.config
> ..... SUCCESS
> src.test.python.apache.aurora.client.cli.cron
> ..... SUCCESS
> src.test.python.apache.aurora.client.cli.inspect
> ..... SUCCESS
> src.test.python.apache.aurora.client.cli.job
> ..... SUCCESS
> src.test.python.apache.aurora.client.cli.plugins
> ..... SUCCESS
> src.test.python.apache.aurora.client.cli.quota
> ..... SUCCESS
> src.test.python.apache.aurora.client.cli.sla
> ..... SUCCESS
> src.test.python.apache.aurora.client.cli.supdate
> ..... SUCCESS
> src.test.python.apache.aurora.client.cli.task
> ..... SUCCESS
> src.test.python.apache.aurora.client.cli.update
> ..... SUCCESS
> src.test.python.apache.aurora.client.cli.version
> ..... SUCCESS
> src.test.python.apache.aurora.client.config
> ..... SUCCESS
> src.test.python.apache.aurora.client.hooks.hooked_api
> ..... SUCCESS
>
> src.test.python.apache.aurora.client.hooks.non_hooked_api
> ..... SUCCESS
> src.test.python.apache.aurora.common.test_aurora_job_key
> ..... SUCCESS
> src.test.python.apache.aurora.common.test_cluster
> ..... SUCCESS
> src.test.python.apache.aurora.common.test_cluster_option
> ..... SUCCESS
> src.test.python.apache.aurora.common.test_clusters
> ..... SUCCESS
> src.test.python.apache.aurora.common.test_http_signaler
> ..... SUCCESS
> src.test.python.apache.aurora.common.test_pex_version
> ..... SUCCESS
> src.test.python.apache.aurora.common.test_shellify
> ..... SUCCESS
> src.test.python.apache.aurora.common.test_transport
> ..... SUCCESS
> src.test.python.apache.aurora.config.test_base
> ..... SUCCESS
>
> src.test.python.apache.aurora.config.test_constraint_parsing
> ..... SUCCESS
> src.test.python.apache.aurora.config.test_loader
> ..... SUCCESS
> src.test.python.apache.aurora.config.test_thrift
> ..... SUCCESS
> src.test.python.apache.aurora.executor.common.task_info
> ..... SUCCESS
> src.test.python.apache.aurora.executor.executor_base
> ..... SUCCESS
> src.test.python.apache.aurora.executor.executor_detector
> ..... SUCCESS
> src.test.python.apache.aurora.executor.executor_vars
> ..... SUCCESS
> src.test.python.apache.aurora.executor.status_manager
> ..... SUCCESS
>
> src.test.python.apache.aurora.executor.thermos_task_runner
> ..... FAILURE
> src.test.python.apache.thermos.common.test_pathspec
> ..... SUCCESS
>
> src.test.python.apache.thermos.core.test_runner_integration
> ..... SUCCESS
> src.test.python.apache.thermos.monitoring.test_disk
> ..... SUCCESS
>
> FAILURE
> [31m
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)