This is an automated email from the ASF dual-hosted git repository.
vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git
The following commit(s) were added to refs/heads/main by this push:
new 0bedebfff Avoid using function closures in mem3
0bedebfff is described below
commit 0bedebfff8ea66b19850893e725785755885bd41
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Thu Oct 23 16:11:18 2025 -0400
Avoid using function closures in mem3
Those are for debugging in remsh but still function closure are fragile when
sent between nodes so avoid them and rely on the new erpc module with plain
M,
F, A args.
While at it, improve `dead_nodes/0,1` function. Make it return the node from
whose perspective we noticed the dead nodes. That makes it a bit more clear
what the network partitioned might be look like.
---
src/mem3/src/mem3.erl | 31 +++++++++-----------------
src/mem3/test/eunit/mem3_distribution_test.erl | 3 ++-
2 files changed, 12 insertions(+), 22 deletions(-)
diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl
index 2ce5c8885..f748ff7a0 100644
--- a/src/mem3/src/mem3.erl
+++ b/src/mem3/src/mem3.erl
@@ -500,7 +500,7 @@ do_ping(Node, Timeout) ->
{Tag, Err}
end.
--spec dead_nodes() -> [node() | Error :: term()].
+-spec dead_nodes() -> [{node(), [node()]}].
%% @doc Returns a list of dead nodes from the cluster.
%%
@@ -518,32 +518,21 @@ do_ping(Node, Timeout) ->
dead_nodes() ->
dead_nodes(?PING_TIMEOUT_IN_MS).
--spec dead_nodes(Timeout :: pos_integer()) -> [node() | Error :: term()].
+-spec dead_nodes(Timeout :: pos_integer()) -> [{node(), [node()]}].
dead_nodes(Timeout) when is_integer(Timeout), Timeout > 0 ->
% Here we are trying to detect overlapping partitions where not all the
% nodes connect to each other. For example: n1 connects to n2 and n3, but
% n2 and n3 are not connected.
- DeadFun = fun() ->
- Expected = ordsets:from_list(mem3:nodes()),
- Live = ordsets:from_list(mem3_util:live_nodes()),
- Dead = ordsets:subtract(Expected, Live),
- ordsets:to_list(Dead)
+ Nodes = [node() | erlang:nodes()],
+ Expected = erpc:multicall(Nodes, mem3, nodes, [], Timeout),
+ Live = erpc:multicall(Nodes, mem3_util, live_nodes, [], Timeout),
+ ZipF = fun
+ (N, {ok, E}, {ok, L}) -> {N, E -- L};
+ (N, _, _) -> {N, Nodes}
end,
- {Responses, BadNodes} = multicall(DeadFun, Timeout),
- AccF = lists:foldl(
- fun
- (Dead, Acc) when is_list(Dead) -> ordsets:union(Acc, Dead);
- (Error, Acc) -> ordsets:union(Acc, [Error])
- end,
- ordsets:from_list(BadNodes),
- Responses
- ),
- ordsets:to_list(AccF).
-
-multicall(Fun, Timeout) when is_integer(Timeout), Timeout > 0 ->
- F = fun() -> catch Fun() end,
- rpc:multicall(erlang, apply, [F, []], Timeout).
+ DeadPerNode = lists:zipwith3(ZipF, Nodes, Expected, Live),
+ lists:sort([{N, lists:sort(D)} || {N, D} <- DeadPerNode, D =/= []]).
db_is_current(#shard{name = Name}) ->
db_is_current(Name);
diff --git a/src/mem3/test/eunit/mem3_distribution_test.erl
b/src/mem3/test/eunit/mem3_distribution_test.erl
index 22dcfd7fa..b04ff5567 100644
--- a/src/mem3/test/eunit/mem3_distribution_test.erl
+++ b/src/mem3/test/eunit/mem3_distribution_test.erl
@@ -143,4 +143,5 @@ ping_nodes_test(_) ->
dead_nodes_test(_) ->
meck:expect(mem3, nodes, 0, [n1, n2, n3]),
meck:expect(mem3_util, live_nodes, 0, [n1, n2]),
- ?assertEqual([n3], couch_debug:dead_nodes()).
+ Node = node(),
+ ?assertEqual([{Node, [n3]}], couch_debug:dead_nodes()).