On 28.12.2015 16:15, Michael Biebl wrote:
Am 28.12.2015 um 13:33 schrieb Yuriy M. Kaminskiy:
This bug is still present in jessie's systemd 215-17+deb8u1 (backtrace
is same).
If someone, who is able to reproduce the issue, is willing to backport
the necessary changes to v215, I'd be willing to merge this patch for
stable, assuming the change is not too invasive.
This issue was claimed to be "probably fixed" by commit
9c3349e23b14db27e7ba45f82cf647899c563ea9.
I've cherry-picked that commit to v215, fixed conflicts, stripped out
cosmetic changes (and removed one chunk that was later reverted
upstream), see attached.
Unfortunately, I have no idea how to reliable reproduce this issue (and
have no spare machine for tests), so it is completely untested.
Given it is not clear if this issue is completely fixed, I'd rather
replace "assert()" with "return"; maybe, n_running_jobs==0 will break
something somewhere else, but at least it should not outright kill
systemd process with SIGFPE (or assert), see second patch.
>From 82612551e5c5acf64eab66ec6d43d5380e84acf2 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lenn...@poettering.net>
Date: Mon, 5 Jan 2015 17:22:10 +0100
Subject: [PATCH] core: rework counting of running jobs
Let's unify the code that counts the running jobs a bit, in order to
make sure we are less likely to miss one.
This is related to this bug:
https://bugs.freedesktop.org/show_bug.cgi?id=87349
However, it probably won't fix it fully, and I cannot reproduce the issue.
The change also adds an explicit assert change when the counter is off.
(cherry picked from commit 9c3349e23b14db27e7ba45f82cf647899c563ea9)
(and squashed partial revert from commit d6483ba7834b9e63caee929c9d6373b796be1b21)
Conflicts:
src/core/job.c
src/core/manager.c
---
src/core/job.c | 61 +++++++++++++++++++++++++++++++++++++++---------------
src/core/manager.c | 8 ++-----
src/core/unit.c | 11 +++++-----
3 files changed, 52 insertions(+), 28 deletions(-)
diff --git a/src/core/job.c b/src/core/job.c
index dc4f441..cfc63cf 100644
--- a/src/core/job.c
+++ b/src/core/job.c
@@ -96,11 +96,39 @@ void job_free(Job *j) {
free(j);
}
+static void job_set_state(Job *j, JobState state) {
+ assert(j);
+ assert(state >= 0);
+ assert(state < _JOB_STATE_MAX);
+
+ if (j->state == state)
+ return;
+
+ j->state = state;
+
+ if (!j->installed)
+ return;
+
+ if (j->state == JOB_RUNNING)
+ j->unit->manager->n_running_jobs++;
+ else {
+ assert(j->state == JOB_WAITING);
+ assert(j->unit->manager->n_running_jobs > 0);
+
+ j->unit->manager->n_running_jobs--;
+
+ if (j->unit->manager->n_running_jobs <= 0)
+ j->unit->manager->jobs_in_progress_event_source = sd_event_source_unref(j->unit->manager->jobs_in_progress_event_source);
+ }
+}
+
void job_uninstall(Job *j) {
Job **pj;
assert(j->installed);
+ job_set_state(j, JOB_WAITING);
+
pj = (j->type == JOB_NOP) ? &j->unit->nop_job : &j->unit->job;
assert(*pj == j);
@@ -155,6 +183,7 @@ Job* job_install(Job *j) {
assert(!j->installed);
assert(j->type < _JOB_TYPE_MAX_IN_TRANSACTION);
+ assert(j->state == JOB_WAITING);
pj = (j->type == JOB_NOP) ? &j->unit->nop_job : &j->unit->job;
uj = *pj;
@@ -181,8 +210,8 @@ Job* job_install(Job *j) {
log_debug_unit(uj->unit->id,
"Merged into running job, re-running: %s/%s as %u",
uj->unit->id, job_type_to_string(uj->type), (unsigned) uj->id);
- uj->state = JOB_WAITING;
- uj->manager->n_running_jobs--;
+
+ job_set_state(uj, JOB_WAITING);
return uj;
}
}
@@ -209,5 +239,9 @@ int job_install_deserialized(Job *j) {
*pj = j;
j->installed = true;
+
+ if (j->state == JOB_RUNNING)
+ j->unit->manager->n_running_jobs++;
+
log_debug_unit(j->unit->id,
"Reinstalled deserialized job %s/%s as %u",
j->unit->id, job_type_to_string(j->type), (unsigned) j->id);
@@ -481,8 +513,7 @@ int job_run_and_invalidate(Job *j) {
if (!job_is_runnable(j))
return -EAGAIN;
- j->state = JOB_RUNNING;
- m->n_running_jobs++;
+ job_set_state(j, JOB_RUNNING);
job_add_to_dbus_queue(j);
/* While we execute this operation the job might go away (for
@@ -542,10 +573,9 @@ int job_run_and_invalidate(Job *j) {
r = job_finish_and_invalidate(j, JOB_SKIPPED, true);
else if (r == -ENOEXEC)
r = job_finish_and_invalidate(j, JOB_INVALID, true);
- else if (r == -EAGAIN) {
- j->state = JOB_WAITING;
- m->n_running_jobs--;
- } else if (r < 0)
+ else if (r == -EAGAIN)
+ job_set_state(j, JOB_WAITING);
+ else if (r < 0)
r = job_finish_and_invalidate(j, JOB_FAILED, true);
}
@@ -752,9 +782,6 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive) {
j->result = result;
- if (j->state == JOB_RUNNING)
- j->manager->n_running_jobs--;
-
log_debug_unit(u->id, "Job %s/%s finished, result=%s",
u->id, job_type_to_string(t), job_result_to_string(result));
@@ -767,7 +794,7 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive) {
if (result == JOB_DONE && t == JOB_RESTART) {
job_change_type(j, JOB_START);
- j->state = JOB_WAITING;
+ job_set_state(j, JOB_WAITING);
job_add_to_run_queue(j);
@@ -995,7 +1022,7 @@ int job_deserialize(Job *j, FILE *f, FDSet *fds) {
if (s < 0)
log_debug("Failed to parse job state %s", v);
else
- j->state = s;
+ job_set_state(j, s);
} else if (streq(l, "job-override")) {
int b;
diff --git a/src/core/manager.c b/src/core/manager.c
index 0cb2044..1b65212 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -164,6 +164,7 @@ static void manager_print_jobs_in_progress(Manager *m) {
uint64_t x;
assert(m);
+ assert(m->n_running_jobs > 0);
manager_flip_auto_status(m, true);
@@ -2454,9 +2453,6 @@ void manager_check_finished(Manager *m) {
assert(m);
- if (m->n_running_jobs == 0)
- m->jobs_in_progress_event_source = sd_event_source_unref(m->jobs_in_progress_event_source);
-
if (hashmap_size(m->jobs) > 0) {
if (m->jobs_in_progress_event_source) {
diff --git a/src/core/unit.c b/src/core/unit.c
index 0e4ebfd..73af112 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -2454,9 +2456,6 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
job_free(j);
return r;
}
-
- if (j->state == JOB_RUNNING)
- u->manager->n_running_jobs++;
} else {
/* legacy */
JobType type = job_type_from_string(v);
--
2.1.4
>From 917694cb0ed012f85b066950b3af2f941fbd93cd Mon Sep 17 00:00:00 2001
From: "Yuriy M. Kaminskiy" <yum...@gmail.com>
Date: Sat, 16 Jan 2016 14:37:09 +0300
Subject: [PATCH] Paranoia against SIGFPE
---
src/core/manager.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/core/manager.c b/src/core/manager.c
index 1b65212..04616d0 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -164,7 +164,7 @@ static void manager_print_jobs_in_progress(Manager *m) {
uint64_t x;
assert(m);
- assert(m->n_running_jobs > 0);
+ if(m->n_running_jobs < 1) return;
manager_flip_auto_status(m, true);
--
2.1.4