This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch improve-replicator-app-stopping in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit e68c84e9bc2d3ffb42552b4972135a734e7a4d35 Author: Nick Vatamaniuc <[email protected]> AuthorDate: Tue Jul 8 00:27:07 2025 -0400 Avoid making a mess in the logs when stopping replicator app When the replicator app is stopping or crashing some of the jobs may be left behind. When they stop they'll try to leave the `pg` process group and/or notify the replicator gen_event handler. If those gen_servers are already shut down those attempts will make a mess in the logs. To avoid that ignore them if they fail. While adding tests to the scheduler, take the opportunity to update its tests to use the usual `TDEF_FE` macro and remove the more verbose `_test(begin...end)` construct. --- .../src/couch_replicator_notifier.erl | 30 ++++++++++++++++------ src/couch_replicator/src/couch_replicator_pg.erl | 14 +++++++++- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/src/couch_replicator/src/couch_replicator_notifier.erl b/src/couch_replicator/src/couch_replicator_notifier.erl index 21c6d5a25..fd3eb92b0 100644 --- a/src/couch_replicator/src/couch_replicator_notifier.erl +++ b/src/couch_replicator/src/couch_replicator_notifier.erl @@ -14,6 +14,8 @@ -behaviour(gen_event). +-define(NAME, couch_replication). + % public API -export([start_link/1, stop/1, notify/1]). @@ -21,17 +23,20 @@ -export([init/1]). -export([handle_event/2, handle_call/2, handle_info/2]). --include_lib("couch/include/couch_db.hrl"). - start_link(FunAcc) -> - couch_event_sup:start_link( - couch_replication, - {couch_replicator_notifier, make_ref()}, - FunAcc - ). + couch_event_sup:start_link(?NAME, {?MODULE, make_ref()}, FunAcc). notify(Event) -> - gen_event:notify(couch_replication, Event). + try + gen_event:notify(?NAME, Event) + catch + _:_ -> + % It's possible some jobs may remain around after the notification + % service had shut down or crashed. Avoid making a mess in the logs + % and just ignore that. At that point nobody will notice the + % notification anyway. + ok + end. stop(Pid) -> couch_event_sup:stop(Pid). @@ -51,3 +56,12 @@ handle_call(_Msg, State) -> handle_info(_Msg, State) -> {ok, State}. + +-ifdef(TEST). + +-include_lib("couch/include/couch_eunit.hrl"). + +couch_replicator_notify_when_stopped_test() -> + ?assert(ok, notify({stopped, foo})). + +-endif. diff --git a/src/couch_replicator/src/couch_replicator_pg.erl b/src/couch_replicator/src/couch_replicator_pg.erl index 25937ec15..d313f67ae 100644 --- a/src/couch_replicator/src/couch_replicator_pg.erl +++ b/src/couch_replicator/src/couch_replicator_pg.erl @@ -47,7 +47,14 @@ join({_, _} = RepId, Pid) when is_pid(Pid) -> % quicker. % leave({_, _} = RepId, Pid) when is_pid(Pid) -> - pg:leave(?MODULE, id(RepId), Pid). + try + pg:leave(?MODULE, id(RepId), Pid) + catch + _:_ -> + ok + % If this is called during shutdown the pg gen_server might be + % gone. So we avoid blocking on it or making a mess in the logs + end. % Determine if a replication job should start on a particular node. If it % should, return `yes`, otherwise return `{no, OtherPid}`. `OtherPid` is @@ -150,4 +157,9 @@ t_should_run(_) -> ok = join(RepId, InitPid), ?assertEqual({no, InitPid}, should_run(RepId, Pid)). +couch_replicator_pg_test_leave_when_stopped_test() -> + RepId = {"a", "+b"}, + Pid = self(), + ?assert(ok, leave(RepId, Pid)). + -endif.
