In my first mail I had the patch attached but I got the message "A non-text attachment was scrubbed..." so that's why afterwards I inserted the lines into the mail message. So now, I made the patch with git format-patch. I will still insert the patch in the message and also add it as attachment. Please excuse me for any inconvenience, but it's my first patch for the open-source community.
>From 4d0c7dc439430f592e5c6251d91e162d2191277d Mon Sep 17 00:00:00 2001 From: Cristian Patrascu <[email protected]> Date: Tue, 5 Jul 2011 11:11:34 +0300 Subject: [PATCH] systemd restart retries on failure --- src/dbus-service.c | 4 ++++ src/load-fragment.c | 1 + src/service.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- src/service.h | 3 +++ src/unit.c | 4 ++++ 5 files changed, 61 insertions(+), 1 deletions(-) diff --git a/src/dbus-service.c b/src/dbus-service.c index 3486623..40e1efe 100644 --- a/src/dbus-service.c +++ b/src/dbus-service.c @@ -42,6 +42,8 @@ " <property name=\"PIDFile\" type=\"s\" access=\"read\"/>\n" \ " <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \ " <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \ + " <property name=\"MaxRestartRetries\" type=\"u\" access=\"read\"/>\n" \ + " <property name=\"RestartRetry\" type=\"u\" access=\"read\"/>\n" \ " <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \ BUS_EXEC_COMMAND_INTERFACE("ExecStartPre") \ BUS_EXEC_COMMAND_INTERFACE("ExecStart") \ @@ -103,6 +105,8 @@ DBusHandlerResult bus_service_message_handler(Unit *u, DBusConnection *connectio { "org.freedesktop.systemd1.Service", "PIDFile", bus_property_append_string, "s", u->service.pid_file }, { "org.freedesktop.systemd1.Service", "NotifyAccess", bus_service_append_notify_access, "s", &u->service.notify_access }, { "org.freedesktop.systemd1.Service", "RestartUSec", bus_property_append_usec, "t", &u->service.restart_usec }, + { "org.freedesktop.systemd1.Service", "MaxRestartRetries", bus_property_append_unsigned, "u", &u->service.restart_retries }, + { "org.freedesktop.systemd1.Service", "RestartRetry", bus_property_append_unsigned, "u", &u->service.restart_current_retry }, { "org.freedesktop.systemd1.Service", "TimeoutUSec", bus_property_append_usec, "t", &u->service.timeout_usec }, BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START_PRE], "ExecStartPre"), BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START], "ExecStart"), diff --git a/src/load-fragment.c b/src/load-fragment.c index 56eaed9..7e9efaa 100644 --- a/src/load-fragment.c +++ b/src/load-fragment.c @@ -1951,6 +1951,7 @@ static int load_from_path(Unit *u, const char *path) { { "TimeoutSec", config_parse_usec, 0, &u->service.timeout_usec, "Service" }, { "Type", config_parse_service_type, 0, &u->service.type, "Service" }, { "Restart", config_parse_service_restart, 0, &u->service.restart, "Service" }, + { "MaxRestartRetries", config_parse_unsigned, 0, &u->service.restart_retries, "Service" }, { "PermissionsStartOnly", config_parse_bool, 0, &u->service.permissions_start_only, "Service" }, { "RootDirectoryStartOnly", config_parse_bool, 0, &u->service.root_directory_start_only, "Service" }, { "RemainAfterExit", config_parse_bool, 0, &u->service.remain_after_exit, "Service" }, diff --git a/src/service.c b/src/service.c index d59c4cb..b5159a6 100644 --- a/src/service.c +++ b/src/service.c @@ -1151,6 +1151,20 @@ static int service_load(Unit *u) { if (s->meta.default_dependencies) if ((r = service_add_default_dependencies(s)) < 0) return r; + + /* If the option "RestartRetries=" is set then the service will be "revived" */ + if (s->restart_retries > 0) { + /* If the option "Restart=" is not set, then for the service to be "revived" + we must set "Restart=" to "on-failure" */ + if (s->restart == SERVICE_RESTART_NO) + s->restart = SERVICE_RESTART_ON_FAILURE; + + /* If the option "Restart=" is already set, then for the service to be "revived" + and be restarted on failure we must set "Restart=" to "always" + only if it's "on-success" */ + else if (s->restart == SERVICE_RESTART_ON_SUCCESS) + s->restart = SERVICE_RESTART_ALWAYS; + } } return service_verify(s); @@ -1488,6 +1502,16 @@ static void service_set_state(Service *s, ServiceState state) { log_debug("%s changed %s -> %s", s->meta.id, service_state_to_string(old_state), service_state_to_string(state)); unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], !s->reload_failure); + + /* If service finished, reset restart-retry counter, if applicable */ + if (old_state != state && + (state == SERVICE_DEAD || state == SERVICE_FAILED) && + s->restart_current_retry > s->restart_retries) { + log_debug("Service %s entered dead or failed, restart retries at max, reset counter (%u -> 0)!", + s->meta.id, s->restart_current_retry); + s->restart_current_retry = 0; + } + s->reload_failure = false; } @@ -2291,6 +2315,7 @@ static int service_start(Unit *u) { /* Make sure we don't enter a busy loop of some kind. */ if (!ratelimit_test(&s->ratelimit)) { + s->restart_current_retry = 0; log_warning("%s start request repeated too quickly, refusing to start.", u->meta.id); return -ECANCELED; } @@ -2311,6 +2336,7 @@ static int service_stop(Unit *u) { /* This is a user request, so don't do restarts on this * shutdown. */ + s->restart_current_retry = 0; s->forbid_restart = true; /* Already on it */ @@ -2411,6 +2437,8 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) { } } + unit_serialize_item_format(u, f, "restart-current-retry", "%u", s->restart_current_retry); + return 0; } @@ -2510,6 +2538,13 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, dual_timestamp_deserialize(value, &s->main_exec_status.start_timestamp); else if (streq(key, "main-exec-status-exit")) dual_timestamp_deserialize(value, &s->main_exec_status.exit_timestamp); + else if (streq(key, "restart-current-retry")) { + unsigned i; + if (safe_atou(value, &i) < 0) + log_debug("Failed to parse restart-current-retry %s", value); + else + s->restart_current_retry = i; + } else log_debug("Unknown serialization key '%s'", key); @@ -2604,6 +2639,11 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { * gone. */ s->main_command = NULL; + if (success && s->restart_current_retry) { + log_debug("%s changed restart_crt_retry from %u to 0", s->meta.id, s->restart_current_retry); + s->restart_current_retry = 0; + } + switch (s->state) { case SERVICE_START_POST: @@ -2832,7 +2872,15 @@ static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) { break; case SERVICE_AUTO_RESTART: - log_info("%s holdoff time over, scheduling restart.", u->meta.id); + if (s->restart_retries > 0) { + s->restart_current_retry ++; + if (s->restart_current_retry > s->restart_retries) { + log_warning("%s maximum number of restarting retries reached. Stopping.", s->meta.id); + service_enter_dead(s, true, false); + break; + } + } + log_info("%s holdoff time over, scheduling restart (retry %u of %u).", u->meta.id, s->restart_current_retry, s->restart_retries); service_enter_restart(s); break; diff --git a/src/service.h b/src/service.h index 55b9513..1d6c42c 100644 --- a/src/service.h +++ b/src/service.h @@ -92,6 +92,9 @@ struct Service { ServiceType type; ServiceRestart restart; + unsigned restart_retries; + unsigned restart_current_retry; + /* If set we'll read the main daemon PID from this file */ char *pid_file; diff --git a/src/unit.c b/src/unit.c index 9bb4e56..2ae63d1 100644 --- a/src/unit.c +++ b/src/unit.c @@ -1114,6 +1114,10 @@ void unit_trigger_on_failure(Unit *u) { if (set_size(u->meta.dependencies[UNIT_ON_FAILURE]) <= 0) return; + if (u->meta.type == UNIT_SERVICE && + u->service.restart_current_retry <= u->service.restart_retries) + return; + log_info("Triggering OnFailure= dependencies of %s.", u->meta.id); SET_FOREACH(other, u->meta.dependencies[UNIT_ON_FAILURE], i) { -- 1.7.0.4 ________________________________________ From: Lennart Poettering [[email protected]] Sent: Monday, July 04, 2011 8:35 PM To: Patrascu, Cristian Cc: Zaharia, Adrian; [email protected] Subject: Re: [systemd-devel] [PATCH] systemd restart patch On Mon, 04.07.11 18:18, Cristian Patrascu ([email protected]) wrote: > > Thank you for your observations and interest! > > So I made some modifications: > - changed restart variables to unsigned > - changed to 0 as default (not -1, also dropped the define) > - moved counter reset from unit.c to service.c (after unit_notify call) > - because the "OnFailure" unit is triggered in unit.c, I still left > the checking before that. > - updated coding style > - added restart counter (de)serialization heya, Can you please post this patch properly formatted with git's format-patch command and not with line breaking added by your mailer? Otherwise I have trouble reviewing and applying your patch! Thanks, Lennart > > The patch is for the same branch > (3661ac04b4f2840d3345605aa35963bbde3c469d) as before: > > Index: systemd-29/src/dbus-service.c > =================================================================== > --- systemd-29.orig/src/dbus-service.c 2011-07-04 > 16:19:37.000000000 +0300 > +++ systemd-29/src/dbus-service.c 2011-07-04 17:07:09.971651549 +0300 > @@ -42,6 +42,8 @@ > " <property name=\"PIDFile\" type=\"s\" access=\"read\"/>\n" \ > " <property name=\"NotifyAccess\" type=\"s\" > access=\"read\"/>\n" \ > " <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \ > + " <property name=\"MaxRestartRetries\" type=\"u\" > access=\"read\"/>\n" \ > + " <property name=\"RestartRetry\" type=\"u\" access=\"read\"/>\n" \ > " <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \ > BUS_EXEC_COMMAND_INTERFACE("ExecStartPre") \ > BUS_EXEC_COMMAND_INTERFACE("ExecStart") \ > @@ -103,6 +105,8 @@ > { "org.freedesktop.systemd1.Service", "PIDFile", > bus_property_append_string, "s", u->service.pid_file > }, > { "org.freedesktop.systemd1.Service", > "NotifyAccess", bus_service_append_notify_access, "s", > &u->service.notify_access }, > { "org.freedesktop.systemd1.Service", > "RestartUSec", bus_property_append_usec, "t", > &u->service.restart_usec }, > + { "org.freedesktop.systemd1.Service", > "MaxRestartRetries", bus_property_append_unsigned, "u", > &u->service.restart_retries }, > + { "org.freedesktop.systemd1.Service", > "RestartRetry", bus_property_append_unsigned, "u", > &u->service.restart_current_retry }, > { "org.freedesktop.systemd1.Service", > "TimeoutUSec", bus_property_append_usec, "t", > &u->service.timeout_usec }, > BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", > u->service.exec_command[SERVICE_EXEC_START_PRE], "ExecStartPre"), > BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", > u->service.exec_command[SERVICE_EXEC_START], "ExecStart"), > Index: systemd-29/src/load-fragment.c > =================================================================== > --- systemd-29.orig/src/load-fragment.c 2011-07-04 > 16:19:37.000000000 +0300 > +++ systemd-29/src/load-fragment.c 2011-07-04 17:07:09.975651196 +0300 > @@ -1951,6 +1951,7 @@ > { "TimeoutSec", config_parse_usec, > 0, &u->service.timeout_usec, "Service" }, > { "Type", > config_parse_service_type, 0, &u->service.type, > "Service" }, > { "Restart", > config_parse_service_restart, 0, &u->service.restart, > "Service" }, > + { "MaxRestartRetries", config_parse_unsigned, > 0, &u->service.restart_retries, "Service" }, > { "PermissionsStartOnly", config_parse_bool, > 0, &u->service.permissions_start_only, "Service" }, > { "RootDirectoryStartOnly", config_parse_bool, > 0, &u->service.root_directory_start_only, "Service" }, > { "RemainAfterExit", config_parse_bool, > 0, &u->service.remain_after_exit, "Service" }, > Index: systemd-29/src/service.c > =================================================================== > --- systemd-29.orig/src/service.c 2011-07-04 16:19:37.000000000 +0300 > +++ systemd-29/src/service.c 2011-07-04 17:14:32.823465057 +0300 > @@ -1151,6 +1166,20 @@ > if (s->meta.default_dependencies) > if ((r = service_add_default_dependencies(s)) < 0) > return r; > + > + /* If the option "RestartRetries=" is set then the > service will be "revived" */ > + if (s->restart_retries > 0) { > + /* If the option "Restart=" is not set, > then for the service to be "revived" > + we must set "Restart=" to "on-failure" */ > + if (s->restart == SERVICE_RESTART_NO) > + s->restart = SERVICE_RESTART_ON_FAILURE; > + > + /* If the option "Restart=" is already set, > then for the service to be "revived" > + and be restarted on failure we must set > "Restart=" to "always" > + only if it's "on-success" */ > + else if (s->restart == SERVICE_RESTART_ON_SUCCESS) > + s->restart = SERVICE_RESTART_ALWAYS; > + } > } > > return service_verify(s); > @@ -1488,6 +1517,16 @@ > log_debug("%s changed %s -> %s", s->meta.id, > service_state_to_string(old_state), service_state_to_string(state)); > > unit_notify(UNIT(s), state_translation_table[old_state], > state_translation_table[state], !s->reload_failure); > + > + /* If service finished, reset restart-retry counter, if > applicable */ > + if (old_state != state && > + (state == SERVICE_DEAD || state == SERVICE_FAILED) && > + s->restart_current_retry > s->restart_retries) { > + log_debug("Service %s entered dead or failed, > restart retries at max, reset counter (%u -> 0)!", > + s->meta.id, s->restart_current_retry); > + s->restart_current_retry = 0; > + } > + > s->reload_failure = false; > } > > @@ -2291,6 +2330,7 @@ > > /* Make sure we don't enter a busy loop of some kind. */ > if (!ratelimit_test(&s->ratelimit)) { > + s->restart_current_retry = 0; > log_warning("%s start request repeated too quickly, > refusing to start.", u->meta.id); > return -ECANCELED; > } > @@ -2311,6 +2351,7 @@ > > /* This is a user request, so don't do restarts on this > * shutdown. */ > + s->restart_current_retry = 0; > s->forbid_restart = true; > > /* Already on it */ > @@ -2411,6 +2452,8 @@ > } > } > > + unit_serialize_item_format(u, f, "restart-current-retry", > "%u", s->restart_current_retry); > + > return 0; > } > > @@ -2510,6 +2553,13 @@ > dual_timestamp_deserialize(value, > &s->main_exec_status.start_timestamp); > else if (streq(key, "main-exec-status-exit")) > dual_timestamp_deserialize(value, > &s->main_exec_status.exit_timestamp); > + else if (streq(key, "restart-current-retry")) { > + unsigned i; > + if (safe_atou(value, &i) < 0) > + log_debug("Failed to parse > restart-current-retry %s", value); > + else > + s->restart_current_retry = i; > + } > else > log_debug("Unknown serialization key '%s'", key); > > @@ -2604,6 +2654,11 @@ > * gone. */ > s->main_command = NULL; > > + if (success && s->restart_current_retry) { > + log_debug("%s changed > restart_crt_retry from %u to 0", s->meta.id, > s->restart_current_retry); > + s->restart_current_retry = 0; > + } > + > switch (s->state) { > > case SERVICE_START_POST: > @@ -2832,7 +2887,15 @@ > break; > > case SERVICE_AUTO_RESTART: > - log_info("%s holdoff time over, scheduling > restart.", u->meta.id); > + if (s->restart_retries > 0) { > + s->restart_current_retry ++; > + if (s->restart_current_retry > > s->restart_retries) { > + log_warning("%s maximum number of > restarting retries reached. Stopping.", s->meta.id); > + service_enter_dead(s, true, false); > + break; > + } > + } > + log_info("%s holdoff time over, scheduling restart > (retry %u of %u).", u->meta.id, s->restart_current_retry, > s->restart_retries); > service_enter_restart(s); > break; > > Index: systemd-29/src/service.h > =================================================================== > --- systemd-29.orig/src/service.h 2011-07-04 16:19:37.000000000 +0300 > +++ systemd-29/src/service.h 2011-07-04 17:07:09.975651196 +0300 > @@ -92,6 +92,9 @@ > ServiceType type; > ServiceRestart restart; > > + unsigned restart_retries; > + unsigned restart_current_retry; > + > /* If set we'll read the main daemon PID from this file */ > char *pid_file; > > Index: systemd-29/src/unit.c > =================================================================== > --- systemd-29.orig/src/unit.c 2011-07-04 16:19:37.000000000 +0300 > +++ systemd-29/src/unit.c 2011-07-04 17:07:09.979652060 +0300 > @@ -1114,6 +1114,10 @@ > if (set_size(u->meta.dependencies[UNIT_ON_FAILURE]) <= 0) > return; > > + if (u->meta.type == UNIT_SERVICE && > + u->service.restart_current_retry <= > u->service.restart_retries) > + return; > + > log_info("Triggering OnFailure= dependencies of %s.", u->meta.id); > > SET_FOREACH(other, u->meta.dependencies[UNIT_ON_FAILURE], i) { > > > On 07/02/2011 03:21 AM, Lennart Poettering wrote: > >On Fri, 01.07.11 15:35, Cristian Patrascu ([email protected]) > >wrote: > > > >Heya! > > > >(BTW, got your first post of this patch too, just didn't find the time > >to review the patch, sorry. It was still in my mail queue however.) > > > >> After "x" restarts have happened, the "OnFailure" unit will start > >>(if specified) but only after all unsuccessfull restarts (after "x" > >>restarts reached). > >Patch looks pretty good. A few comments though. > >>This patch is made for systemd-29, on master branch with SHA1 ID: > >>3661ac04b4f2840d3345605aa35963bbde3c469d > >> > >>________systemd-restart-on-fail.patch : _______________________________ > >> > >>Index: systemd-29/src/dbus-service.c > >>=================================================================== > >>--- systemd-29.orig/src/dbus-service.c 2011-06-22 10:47:14.000000000 > >>+0300 > >>+++ systemd-29/src/dbus-service.c 2011-06-22 13:48:51.292321742 +0300 > >>@@ -42,6 +42,8 @@ > >> "<property name=\"PIDFile\" type=\"s\" access=\"read\"/>\n" \ > >> "<property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \ > >> "<property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \ > >>+ "<property name=\"MaxRestartRetries\" type=\"i\" > >>access=\"read\"/>\n" \ > >>+ "<property name=\"RestartRetry\" type=\"i\" > >>access=\"read\"/>\n" \ > >I Think it would make sense to make both of these unsigned, in order to > >avoid confusion whether these actually ever can be negative. > > > >If I read your patch correctly right now MaxRestartRetries=-1 means that > >there is no limit on the number of retries. I'd use 0 for that instead > >(0 is not needed to indicate disabling, since we can indicate that with > >Restart=no already.) > > > >>+ s->restart_retries = DEFAULT_RESTART_RETRIES; > >>+ s->restart_crt_retry = 0; > >We generally try not to abbrievate variables unnecessarily, so I'd suggest > >to call this variable "restart_current_retry" or so. > > > >> #ifdef HAVE_SYSV_COMPAT > >> s->sysv_start_priority = -1; > >> s->sysv_start_priority_from_rcnd = -1; > >>@@ -1151,6 +1153,22 @@ > >> if (s->meta.default_dependencies) > >> if ((r = service_add_default_dependencies(s))< > >> 0) > >> return r; > >>+ > >>+ /* If the option "RestartRetries=" is set then the service > >>will be "revived" */ > >>+ if (s->restart_retries != DEFAULT_RESTART_RETRIES){ > >>+ /* If the option "Restart=" is not set, then for > >>the service to be "revived" > >>+ we must set "Restart=" to "on-failure" */ > >>+ if (s->restart == SERVICE_RESTART_NO){ > >>+ s->restart = SERVICE_RESTART_ON_FAILURE; > >>+ } > >Please follow coding style. We generally place no {} brackets around > >single line blocks. > > > >> case SERVICE_AUTO_RESTART: > >>- log_info("%s holdoff time over, scheduling restart.", > >>u->meta.id); > >>+ if (0<= s->restart_retries){ > >Please follow coding style, compare variables with constants not > >constants with variables. Also, please place spaces before the<= and > >the {. > > > >>+ if (u->meta.type == UNIT_SERVICE&& > >>+ u->service.restart_crt_retry<= u->service.restart_retries) > >>+ return; > >>+ > >Huhm, I wonder if we could fine a better place for this, not sure where > >though. > > > >> log_info("Triggering OnFailure= dependencies of %s.", u->meta.id); > >> > >> SET_FOREACH(other, u->meta.dependencies[UNIT_ON_FAILURE], i) { > >>@@ -1245,6 +1249,17 @@ > >> log_notice("Unit %s entered failed state.", > >> u->meta.id); > >> unit_trigger_on_failure(u); > >> } > >>+ > >>+ /* When a unit is of type service and has finished, > >>+ reset restart-retry counter, if applicable */ > >>+ if (ns != os&& > >>+ u->meta.type == UNIT_SERVICE&& > >>+ UNIT_IS_INACTIVE_OR_FAILED(ns)&& > >>+ u->service.restart_crt_retry> > >>u->service.restart_retries ){ > >>+ log_debug("Unit %s entered inactive or failed, > >>restart retries at max, reset counter (%d -> 0)!", > >>+ u->meta.id, > >>u->service.restart_crt_retry, u->service.restart_retries); > >>+ u->service.restart_crt_retry = 0; > >Hmm, I think it would be nicer to reset this counter in service.c only, not > >in the generic code. I'd like to avoid that we access too many > >service-private fields from generic unit code. > > > >> #include "execute.h" > >> #include "condition.h" > >> > >>+/* default = infinite retries (= systemd behaviour not patched) */ > >>+#define DEFAULT_RESTART_RETRIES -1 > >>+ > >Hee, if I commit this, then it won't be patched anymore, so the comment > >should not refer to it being unpatched ;-) > > > >Patch looks quite OK in general. > > > >Thanks for the work, > > > >Lennart Lennart -- Lennart Poettering - Red Hat, Inc.
From 4d0c7dc439430f592e5c6251d91e162d2191277d Mon Sep 17 00:00:00 2001 From: Cristian Patrascu <[email protected]> Date: Tue, 5 Jul 2011 11:11:34 +0300 Subject: [PATCH] systemd restart retries on failure --- src/dbus-service.c | 4 ++++ src/load-fragment.c | 1 + src/service.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- src/service.h | 3 +++ src/unit.c | 4 ++++ 5 files changed, 61 insertions(+), 1 deletions(-) diff --git a/src/dbus-service.c b/src/dbus-service.c index 3486623..40e1efe 100644 --- a/src/dbus-service.c +++ b/src/dbus-service.c @@ -42,6 +42,8 @@ " <property name=\"PIDFile\" type=\"s\" access=\"read\"/>\n" \ " <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \ " <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \ + " <property name=\"MaxRestartRetries\" type=\"u\" access=\"read\"/>\n" \ + " <property name=\"RestartRetry\" type=\"u\" access=\"read\"/>\n" \ " <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \ BUS_EXEC_COMMAND_INTERFACE("ExecStartPre") \ BUS_EXEC_COMMAND_INTERFACE("ExecStart") \ @@ -103,6 +105,8 @@ DBusHandlerResult bus_service_message_handler(Unit *u, DBusConnection *connectio { "org.freedesktop.systemd1.Service", "PIDFile", bus_property_append_string, "s", u->service.pid_file }, { "org.freedesktop.systemd1.Service", "NotifyAccess", bus_service_append_notify_access, "s", &u->service.notify_access }, { "org.freedesktop.systemd1.Service", "RestartUSec", bus_property_append_usec, "t", &u->service.restart_usec }, + { "org.freedesktop.systemd1.Service", "MaxRestartRetries", bus_property_append_unsigned, "u", &u->service.restart_retries }, + { "org.freedesktop.systemd1.Service", "RestartRetry", bus_property_append_unsigned, "u", &u->service.restart_current_retry }, { "org.freedesktop.systemd1.Service", "TimeoutUSec", bus_property_append_usec, "t", &u->service.timeout_usec }, BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START_PRE], "ExecStartPre"), BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START], "ExecStart"), diff --git a/src/load-fragment.c b/src/load-fragment.c index 56eaed9..7e9efaa 100644 --- a/src/load-fragment.c +++ b/src/load-fragment.c @@ -1951,6 +1951,7 @@ static int load_from_path(Unit *u, const char *path) { { "TimeoutSec", config_parse_usec, 0, &u->service.timeout_usec, "Service" }, { "Type", config_parse_service_type, 0, &u->service.type, "Service" }, { "Restart", config_parse_service_restart, 0, &u->service.restart, "Service" }, + { "MaxRestartRetries", config_parse_unsigned, 0, &u->service.restart_retries, "Service" }, { "PermissionsStartOnly", config_parse_bool, 0, &u->service.permissions_start_only, "Service" }, { "RootDirectoryStartOnly", config_parse_bool, 0, &u->service.root_directory_start_only, "Service" }, { "RemainAfterExit", config_parse_bool, 0, &u->service.remain_after_exit, "Service" }, diff --git a/src/service.c b/src/service.c index d59c4cb..b5159a6 100644 --- a/src/service.c +++ b/src/service.c @@ -1151,6 +1151,20 @@ static int service_load(Unit *u) { if (s->meta.default_dependencies) if ((r = service_add_default_dependencies(s)) < 0) return r; + + /* If the option "RestartRetries=" is set then the service will be "revived" */ + if (s->restart_retries > 0) { + /* If the option "Restart=" is not set, then for the service to be "revived" + we must set "Restart=" to "on-failure" */ + if (s->restart == SERVICE_RESTART_NO) + s->restart = SERVICE_RESTART_ON_FAILURE; + + /* If the option "Restart=" is already set, then for the service to be "revived" + and be restarted on failure we must set "Restart=" to "always" + only if it's "on-success" */ + else if (s->restart == SERVICE_RESTART_ON_SUCCESS) + s->restart = SERVICE_RESTART_ALWAYS; + } } return service_verify(s); @@ -1488,6 +1502,16 @@ static void service_set_state(Service *s, ServiceState state) { log_debug("%s changed %s -> %s", s->meta.id, service_state_to_string(old_state), service_state_to_string(state)); unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], !s->reload_failure); + + /* If service finished, reset restart-retry counter, if applicable */ + if (old_state != state && + (state == SERVICE_DEAD || state == SERVICE_FAILED) && + s->restart_current_retry > s->restart_retries) { + log_debug("Service %s entered dead or failed, restart retries at max, reset counter (%u -> 0)!", + s->meta.id, s->restart_current_retry); + s->restart_current_retry = 0; + } + s->reload_failure = false; } @@ -2291,6 +2315,7 @@ static int service_start(Unit *u) { /* Make sure we don't enter a busy loop of some kind. */ if (!ratelimit_test(&s->ratelimit)) { + s->restart_current_retry = 0; log_warning("%s start request repeated too quickly, refusing to start.", u->meta.id); return -ECANCELED; } @@ -2311,6 +2336,7 @@ static int service_stop(Unit *u) { /* This is a user request, so don't do restarts on this * shutdown. */ + s->restart_current_retry = 0; s->forbid_restart = true; /* Already on it */ @@ -2411,6 +2437,8 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) { } } + unit_serialize_item_format(u, f, "restart-current-retry", "%u", s->restart_current_retry); + return 0; } @@ -2510,6 +2538,13 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, dual_timestamp_deserialize(value, &s->main_exec_status.start_timestamp); else if (streq(key, "main-exec-status-exit")) dual_timestamp_deserialize(value, &s->main_exec_status.exit_timestamp); + else if (streq(key, "restart-current-retry")) { + unsigned i; + if (safe_atou(value, &i) < 0) + log_debug("Failed to parse restart-current-retry %s", value); + else + s->restart_current_retry = i; + } else log_debug("Unknown serialization key '%s'", key); @@ -2604,6 +2639,11 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { * gone. */ s->main_command = NULL; + if (success && s->restart_current_retry) { + log_debug("%s changed restart_crt_retry from %u to 0", s->meta.id, s->restart_current_retry); + s->restart_current_retry = 0; + } + switch (s->state) { case SERVICE_START_POST: @@ -2832,7 +2872,15 @@ static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) { break; case SERVICE_AUTO_RESTART: - log_info("%s holdoff time over, scheduling restart.", u->meta.id); + if (s->restart_retries > 0) { + s->restart_current_retry ++; + if (s->restart_current_retry > s->restart_retries) { + log_warning("%s maximum number of restarting retries reached. Stopping.", s->meta.id); + service_enter_dead(s, true, false); + break; + } + } + log_info("%s holdoff time over, scheduling restart (retry %u of %u).", u->meta.id, s->restart_current_retry, s->restart_retries); service_enter_restart(s); break; diff --git a/src/service.h b/src/service.h index 55b9513..1d6c42c 100644 --- a/src/service.h +++ b/src/service.h @@ -92,6 +92,9 @@ struct Service { ServiceType type; ServiceRestart restart; + unsigned restart_retries; + unsigned restart_current_retry; + /* If set we'll read the main daemon PID from this file */ char *pid_file; diff --git a/src/unit.c b/src/unit.c index 9bb4e56..2ae63d1 100644 --- a/src/unit.c +++ b/src/unit.c @@ -1114,6 +1114,10 @@ void unit_trigger_on_failure(Unit *u) { if (set_size(u->meta.dependencies[UNIT_ON_FAILURE]) <= 0) return; + if (u->meta.type == UNIT_SERVICE && + u->service.restart_current_retry <= u->service.restart_retries) + return; + log_info("Triggering OnFailure= dependencies of %s.", u->meta.id); SET_FOREACH(other, u->meta.dependencies[UNIT_ON_FAILURE], i) { -- 1.7.0.4
_______________________________________________ systemd-devel mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/systemd-devel
