This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch fix-reduce-limit-log in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 86a346679689598a9fab866abb18e23b5698326d Author: Nick Vatamaniuc <[email protected]> AuthorDate: Mon Sep 8 18:48:20 2025 -0400 Fix reduce_limit = log feature In https://github.com/apache/couchdb/pull/3609 reduce_log was inadvertently disabled and instead of being a tri-state became a boolean setting only, with `log` become `true`. Bring back the `log` feature and add a test to ensure we'd notice if gets disabled again. Also, add some docs and comments about its existence. --- rel/overlay/etc/default.ini | 8 +- src/couch/src/couch_proc_manager.erl | 18 +++- src/couch/test/eunit/couch_query_servers_tests.erl | 120 +++++++++++---------- src/docs/src/config/query-servers.rst | 4 +- src/docs/src/ddocs/ddocs.rst | 3 + 5 files changed, 86 insertions(+), 67 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 6cffc12fa..7b89bc3ff 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -540,11 +540,13 @@ authentication_db = _users ; Erlang Query Server ;enable_erlang_query_server = false -; Changing reduce_limit to false will disable reduce_limit. -; If you think you're hitting reduce_limit with a "good" reduce function, -; please let us know on the mailing list so we can fine tune the heuristic. + [query_server_config] ;commit_freq = 5 +; Changing reduce_limit to false will disable reduce_limit. Setting the reduce +; limit to log will only log a warning instead of crashing the view. If you +; think you're hitting reduce_limit with a "good" reduce function, please let +; us know on the mailing list so we can fine tune the heuristic. ;reduce_limit = true ;os_process_limit = 100 ;os_process_idle_limit = 300 diff --git a/src/couch/src/couch_proc_manager.erl b/src/couch/src/couch_proc_manager.erl index d90463bf3..b6a1659a7 100644 --- a/src/couch/src/couch_proc_manager.erl +++ b/src/couch/src/couch_proc_manager.erl @@ -730,13 +730,23 @@ remove_waiting_client(#client{wait_key = Key}) -> ets:delete(?WAITERS, Key). get_proc_config() -> - Limit = config:get_boolean("query_server_config", "reduce_limit", true), - Timeout = get_os_process_timeout(), {[ - {<<"reduce_limit">>, Limit}, - {<<"timeout">>, Timeout} + {<<"reduce_limit">>, get_reduce_limit()}, + {<<"timeout">>, get_os_process_timeout()} ]}. +% Reduce limit is a tri-state value of true, false or log. The default value if +% is true. That's also the value if anything other than those 3 values are +% specified. +% +get_reduce_limit() -> + case config:get("query_server_config", "reduce_limit", "true") of + "false" -> false; + "log" -> log; + "true" -> true; + Other when is_list(Other) -> true + end. + get_hard_limit() -> config:get_integer("query_server_config", "os_process_limit", 100). diff --git a/src/couch/test/eunit/couch_query_servers_tests.erl b/src/couch/test/eunit/couch_query_servers_tests.erl index 1ade40b67..57e6dbd36 100644 --- a/src/couch/test/eunit/couch_query_servers_tests.erl +++ b/src/couch/test/eunit/couch_query_servers_tests.erl @@ -16,96 +16,90 @@ -include_lib("couch/include/couch_eunit.hrl"). setup() -> - meck:new([config, couch_log]). + meck:new(couch_log, [passthrough]), + Ctx = test_util:start_couch([ioq]), + config:set("query_server_config", "reduce_limit", "true", false), + config:set("log", "level", "info", false), + Ctx. -teardown(_) -> +teardown(Ctx) -> + config:delete("query_server_config", "reduce_limit", "true", false), + config:delete("log", "level", false), + test_util:stop_couch(Ctx), meck:unload(). -setup_oom() -> - test_util:start_couch([ioq]). - -teardown_oom(Ctx) -> - meck:unload(), - test_util:stop_couch(Ctx). - -sum_overflow_test_() -> +query_server_limits_test_() -> { - "Test overflow detection in the _sum reduce function", + "Test overflow detection and batch splitting in query server", { - setup, + foreach, fun setup/0, fun teardown/1, [ - fun should_return_error_on_overflow/0, - fun should_return_object_on_log/0, - fun should_return_object_on_false/0 - ] - } - }. - -filter_oom_test_() -> - { - "Test recovery from oom in filters", - { - setup, - fun setup_oom/0, - fun teardown_oom/1, - [ - fun should_split_large_batches/0 + ?TDEF_FE(builtin_should_return_error_on_overflow), + ?TDEF_FE(builtin_should_return_object_on_log), + ?TDEF_FE(builtin_should_return_object_on_false), + ?TDEF_FE(js_reduce_should_return_error_on_overflow), + ?TDEF_FE(js_reduce_should_return_object_on_log), + ?TDEF_FE(js_reduce_should_return_object_on_false), + ?TDEF_FE(should_split_large_batches) ] } }. -should_return_error_on_overflow() -> - meck:reset([config, couch_log]), - meck:expect( - config, - get, - ["query_server_config", "reduce_limit", "true"], - "true" - ), - meck:expect(couch_log, error, ['_', '_'], ok), +builtin_should_return_error_on_overflow(_) -> + config:set("query_server_config", "reduce_limit", "true", false), + meck:reset(couch_log), KVs = gen_sum_kvs(), {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs), ?assertMatch({[{<<"error">>, <<"builtin_reduce_error">>} | _]}, Result), - ?assert(meck:called(config, get, '_')), ?assert(meck:called(couch_log, error, '_')). -should_return_object_on_log() -> - meck:reset([config, couch_log]), - meck:expect( - config, - get, - ["query_server_config", "reduce_limit", "true"], - "log" - ), - meck:expect(couch_log, error, ['_', '_'], ok), +builtin_should_return_object_on_log(_) -> + config:set("query_server_config", "reduce_limit", "log", false), + meck:reset(couch_log), KVs = gen_sum_kvs(), {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs), ?assertMatch({[_ | _]}, Result), Keys = [K || {K, _} <- element(1, Result)], ?assert(not lists:member(<<"error">>, Keys)), - ?assert(meck:called(config, get, '_')), ?assert(meck:called(couch_log, error, '_')). -should_return_object_on_false() -> - meck:reset([config, couch_log]), - meck:expect( - config, - get, - ["query_server_config", "reduce_limit", "true"], - "false" - ), - meck:expect(couch_log, error, ['_', '_'], ok), +builtin_should_return_object_on_false(_) -> + config:set("query_server_config", "reduce_limit", "false", false), + meck:reset(couch_log), KVs = gen_sum_kvs(), {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs), ?assertMatch({[_ | _]}, Result), Keys = [K || {K, _} <- element(1, Result)], ?assert(not lists:member(<<"error">>, Keys)), - ?assert(meck:called(config, get, '_')), ?assertNot(meck:called(couch_log, error, '_')). -should_split_large_batches() -> +js_reduce_should_return_error_on_overflow(_) -> + config:set("query_server_config", "reduce_limit", "true", false), + meck:reset(couch_log), + KVs = gen_sum_kvs(), + {ok, [Result]} = couch_query_servers:reduce(<<"javascript">>, [sum_js()], KVs), + ?assertMatch({[{reduce_overflow_error, <<"Reduce output must shrink", _/binary>>}]}, Result), + ?assert(meck:called(couch_log, error, '_')). + +js_reduce_should_return_object_on_log(_) -> + config:set("query_server_config", "reduce_limit", "log", false), + meck:reset(couch_log), + KVs = gen_sum_kvs(), + {ok, [Result]} = couch_query_servers:reduce(<<"javascript">>, [sum_js()], KVs), + ?assertMatch([<<"result">>, [_ | _]], Result), + ?assert(meck:called(couch_log, info, '_')). + +js_reduce_should_return_object_on_false(_) -> + config:set("query_server_config", "reduce_limit", "false", false), + meck:reset(couch_log), + KVs = gen_sum_kvs(), + {ok, [Result]} = couch_query_servers:reduce(<<"javascript">>, [sum_js()], KVs), + ?assertMatch([<<"result">>, [_ | _]], Result), + ?assertNot(meck:called(couch_log, info, '_')). + +should_split_large_batches(_) -> Req = {json_req, {[]}}, Db = <<"somedb">>, DDoc = #doc{ @@ -152,3 +146,11 @@ gen_sum_kvs() -> end, lists:seq(1, 10) ). + +sum_js() -> + % Don't do this in real views + <<"\n" + " function(keys, vals, rereduce) {\n" + " return ['result', vals.concat(vals)]\n" + " }\n" + " ">>. diff --git a/src/docs/src/config/query-servers.rst b/src/docs/src/config/query-servers.rst index 285bf1280..ac6a05e0a 100644 --- a/src/docs/src/config/query-servers.rst +++ b/src/docs/src/config/query-servers.rst @@ -139,7 +139,9 @@ Query Servers Configuration .. config:option:: reduce_limit :: Reduce limit control Controls `Reduce overflow` error that raises when output of - :ref:`reduce functions <reducefun>` is too big:: + :ref:`reduce functions <reducefun>` is too big. The possible values are + ``true``, ``false`` or ``log``. The ``log`` value will log a warning + instead of crashing the view :: [query_server_config] reduce_limit = true diff --git a/src/docs/src/ddocs/ddocs.rst b/src/docs/src/ddocs/ddocs.rst index 4d3c06633..fcb741e5f 100644 --- a/src/docs/src/ddocs/ddocs.rst +++ b/src/docs/src/ddocs/ddocs.rst @@ -152,6 +152,9 @@ that the main task of reduce functions is to *reduce* the mapped result, not to make it bigger. Generally, your reduce function should converge rapidly to a single value - which could be an array or similar object. +Set ``reduce_limit`` to ``log`` so views which would crash if the setting were +``true`` would instead return the result and log an ``info`` level warning. + .. _reducefun/builtin: Built-in Reduce Functions
