This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/main by this push:
     new 0bedebfff Avoid using function closures in mem3
0bedebfff is described below

commit 0bedebfff8ea66b19850893e725785755885bd41
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Thu Oct 23 16:11:18 2025 -0400

    Avoid using function closures in mem3
    
    Those are for debugging in remsh but still function closure are fragile when
    sent between nodes so avoid them and rely on the new erpc module with plain 
M,
    F, A args.
    
    While at it, improve `dead_nodes/0,1` function. Make it return the node from
    whose perspective we noticed the dead nodes. That makes it a bit more clear
    what the network partitioned might be look like.
---
 src/mem3/src/mem3.erl                          | 31 +++++++++-----------------
 src/mem3/test/eunit/mem3_distribution_test.erl |  3 ++-
 2 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl
index 2ce5c8885..f748ff7a0 100644
--- a/src/mem3/src/mem3.erl
+++ b/src/mem3/src/mem3.erl
@@ -500,7 +500,7 @@ do_ping(Node, Timeout) ->
             {Tag, Err}
     end.
 
--spec dead_nodes() -> [node() | Error :: term()].
+-spec dead_nodes() -> [{node(), [node()]}].
 
 %% @doc Returns a list of dead nodes from the cluster.
 %%
@@ -518,32 +518,21 @@ do_ping(Node, Timeout) ->
 dead_nodes() ->
     dead_nodes(?PING_TIMEOUT_IN_MS).
 
--spec dead_nodes(Timeout :: pos_integer()) -> [node() | Error :: term()].
+-spec dead_nodes(Timeout :: pos_integer()) -> [{node(), [node()]}].
 
 dead_nodes(Timeout) when is_integer(Timeout), Timeout > 0 ->
     % Here we are trying to detect overlapping partitions where not all the
     % nodes connect to each other. For example: n1 connects to n2 and n3, but
     % n2 and n3 are not connected.
-    DeadFun = fun() ->
-        Expected = ordsets:from_list(mem3:nodes()),
-        Live = ordsets:from_list(mem3_util:live_nodes()),
-        Dead = ordsets:subtract(Expected, Live),
-        ordsets:to_list(Dead)
+    Nodes = [node() | erlang:nodes()],
+    Expected = erpc:multicall(Nodes, mem3, nodes, [], Timeout),
+    Live = erpc:multicall(Nodes, mem3_util, live_nodes, [], Timeout),
+    ZipF = fun
+        (N, {ok, E}, {ok, L}) -> {N, E -- L};
+        (N, _, _) -> {N, Nodes}
     end,
-    {Responses, BadNodes} = multicall(DeadFun, Timeout),
-    AccF = lists:foldl(
-        fun
-            (Dead, Acc) when is_list(Dead) -> ordsets:union(Acc, Dead);
-            (Error, Acc) -> ordsets:union(Acc, [Error])
-        end,
-        ordsets:from_list(BadNodes),
-        Responses
-    ),
-    ordsets:to_list(AccF).
-
-multicall(Fun, Timeout) when is_integer(Timeout), Timeout > 0 ->
-    F = fun() -> catch Fun() end,
-    rpc:multicall(erlang, apply, [F, []], Timeout).
+    DeadPerNode = lists:zipwith3(ZipF, Nodes, Expected, Live),
+    lists:sort([{N, lists:sort(D)} || {N, D} <- DeadPerNode, D =/= []]).
 
 db_is_current(#shard{name = Name}) ->
     db_is_current(Name);
diff --git a/src/mem3/test/eunit/mem3_distribution_test.erl 
b/src/mem3/test/eunit/mem3_distribution_test.erl
index 22dcfd7fa..b04ff5567 100644
--- a/src/mem3/test/eunit/mem3_distribution_test.erl
+++ b/src/mem3/test/eunit/mem3_distribution_test.erl
@@ -143,4 +143,5 @@ ping_nodes_test(_) ->
 dead_nodes_test(_) ->
     meck:expect(mem3, nodes, 0, [n1, n2, n3]),
     meck:expect(mem3_util, live_nodes, 0, [n1, n2]),
-    ?assertEqual([n3], couch_debug:dead_nodes()).
+    Node = node(),
+    ?assertEqual([{Node, [n3]}], couch_debug:dead_nodes()).

Reply via email to