This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 363598bba04837fe3f16ced5c6002c9ba8ad56ec
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Tue Jul 8 00:27:07 2025 -0400

    Avoid making a mess in the logs when stopping replicator app
    
    When the replicator app is stopping or crashing some of the jobs may be left
    behind. When they stop they'll try to leave the `pg` process group and/or
    notify the replicator gen_event handler. If those gen_servers are already 
shut
    down those attempts will make a mess in the logs. To avoid that ignore them 
if
    they fail.
    
    While adding tests to the scheduler, take the opportunity to update its 
tests to
    use the usual `TDEF_FE` macro and remove the more verbose 
`_test(begin...end)`
    construct.
---
 .../src/couch_replicator_notifier.erl              | 30 ++++++++++++++++------
 src/couch_replicator/src/couch_replicator_pg.erl   | 14 +++++++++-
 2 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/src/couch_replicator/src/couch_replicator_notifier.erl 
b/src/couch_replicator/src/couch_replicator_notifier.erl
index 21c6d5a25..4b33e0d06 100644
--- a/src/couch_replicator/src/couch_replicator_notifier.erl
+++ b/src/couch_replicator/src/couch_replicator_notifier.erl
@@ -14,6 +14,8 @@
 
 -behaviour(gen_event).
 
+-define(NAME, couch_replication).
+
 % public API
 -export([start_link/1, stop/1, notify/1]).
 
@@ -21,17 +23,20 @@
 -export([init/1]).
 -export([handle_event/2, handle_call/2, handle_info/2]).
 
--include_lib("couch/include/couch_db.hrl").
-
 start_link(FunAcc) ->
-    couch_event_sup:start_link(
-        couch_replication,
-        {couch_replicator_notifier, make_ref()},
-        FunAcc
-    ).
+    couch_event_sup:start_link(?NAME, {?MODULE, make_ref()}, FunAcc).
 
 notify(Event) ->
-    gen_event:notify(couch_replication, Event).
+    try
+        gen_event:notify(?NAME, Event)
+    catch
+        _:_ ->
+            % It's possible some jobs may remain around after the notification
+            % service had shut down or crashed. Avoid making a mess in the logs
+            % and just ignore that. At that point nobody will notice the
+            % notification anyway.
+            ok
+    end.
 
 stop(Pid) ->
     couch_event_sup:stop(Pid).
@@ -51,3 +56,12 @@ handle_call(_Msg, State) ->
 
 handle_info(_Msg, State) ->
     {ok, State}.
+
+-ifdef(TEST).
+
+-include_lib("couch/include/couch_eunit.hrl").
+
+couch_replicator_notify_when_stopped_test() ->
+    ?assertEqual(ok, notify({stopped, foo})).
+
+-endif.
diff --git a/src/couch_replicator/src/couch_replicator_pg.erl 
b/src/couch_replicator/src/couch_replicator_pg.erl
index 25937ec15..5f4f3bba7 100644
--- a/src/couch_replicator/src/couch_replicator_pg.erl
+++ b/src/couch_replicator/src/couch_replicator_pg.erl
@@ -47,7 +47,14 @@ join({_, _} = RepId, Pid) when is_pid(Pid) ->
 % quicker.
 %
 leave({_, _} = RepId, Pid) when is_pid(Pid) ->
-    pg:leave(?MODULE, id(RepId), Pid).
+    try
+        pg:leave(?MODULE, id(RepId), Pid)
+    catch
+        _:_ ->
+            ok
+        % If this is called during shutdown the pg gen_server might be
+        % gone. So we avoid blocking on it or making a mess in the logs
+    end.
 
 % Determine if a replication job should start on a particular node. If it
 % should, return `yes`, otherwise return `{no, OtherPid}`. `OtherPid` is
@@ -150,4 +157,9 @@ t_should_run(_) ->
     ok = join(RepId, InitPid),
     ?assertEqual({no, InitPid}, should_run(RepId, Pid)).
 
+couch_replicator_pg_test_leave_when_stopped_test() ->
+    RepId = {"a", "+b"},
+    Pid = self(),
+    ?assertEqual(ok, leave(RepId, Pid)).
+
 -endif.

Reply via email to