This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch fix-reduce-limit-log
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 86a346679689598a9fab866abb18e23b5698326d
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Mon Sep 8 18:48:20 2025 -0400

    Fix reduce_limit = log feature
    
    In https://github.com/apache/couchdb/pull/3609 reduce_log was inadvertently
    disabled and instead of being a tri-state became a boolean setting only, 
with
    `log` become `true`.
    
    Bring back the `log` feature and add a test to ensure we'd notice if gets
    disabled again. Also, add some docs and comments about its existence.
---
 rel/overlay/etc/default.ini                        |   8 +-
 src/couch/src/couch_proc_manager.erl               |  18 +++-
 src/couch/test/eunit/couch_query_servers_tests.erl | 120 +++++++++++----------
 src/docs/src/config/query-servers.rst              |   4 +-
 src/docs/src/ddocs/ddocs.rst                       |   3 +
 5 files changed, 86 insertions(+), 67 deletions(-)

diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index 6cffc12fa..7b89bc3ff 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -540,11 +540,13 @@ authentication_db = _users
 ; Erlang Query Server
 ;enable_erlang_query_server = false
 
-; Changing reduce_limit to false will disable reduce_limit.
-; If you think you're hitting reduce_limit with a "good" reduce function,
-; please let us know on the mailing list so we can fine tune the heuristic.
+
 [query_server_config]
 ;commit_freq = 5
+; Changing reduce_limit to false will disable reduce_limit. Setting the reduce
+; limit to log will only log a warning instead of crashing the view. If you
+; think you're hitting reduce_limit with a "good" reduce function, please let
+; us know on the mailing list so we can fine tune the heuristic.
 ;reduce_limit = true
 ;os_process_limit = 100
 ;os_process_idle_limit = 300
diff --git a/src/couch/src/couch_proc_manager.erl 
b/src/couch/src/couch_proc_manager.erl
index d90463bf3..b6a1659a7 100644
--- a/src/couch/src/couch_proc_manager.erl
+++ b/src/couch/src/couch_proc_manager.erl
@@ -730,13 +730,23 @@ remove_waiting_client(#client{wait_key = Key}) ->
     ets:delete(?WAITERS, Key).
 
 get_proc_config() ->
-    Limit = config:get_boolean("query_server_config", "reduce_limit", true),
-    Timeout = get_os_process_timeout(),
     {[
-        {<<"reduce_limit">>, Limit},
-        {<<"timeout">>, Timeout}
+        {<<"reduce_limit">>, get_reduce_limit()},
+        {<<"timeout">>, get_os_process_timeout()}
     ]}.
 
+% Reduce limit is a tri-state value of true, false or log. The default value if
+% is true. That's also the value if anything other than those 3 values are
+% specified.
+%
+get_reduce_limit() ->
+    case config:get("query_server_config", "reduce_limit", "true") of
+        "false" -> false;
+        "log" -> log;
+        "true" -> true;
+        Other when is_list(Other) -> true
+    end.
+
 get_hard_limit() ->
     config:get_integer("query_server_config", "os_process_limit", 100).
 
diff --git a/src/couch/test/eunit/couch_query_servers_tests.erl 
b/src/couch/test/eunit/couch_query_servers_tests.erl
index 1ade40b67..57e6dbd36 100644
--- a/src/couch/test/eunit/couch_query_servers_tests.erl
+++ b/src/couch/test/eunit/couch_query_servers_tests.erl
@@ -16,96 +16,90 @@
 -include_lib("couch/include/couch_eunit.hrl").
 
 setup() ->
-    meck:new([config, couch_log]).
+    meck:new(couch_log, [passthrough]),
+    Ctx = test_util:start_couch([ioq]),
+    config:set("query_server_config", "reduce_limit", "true", false),
+    config:set("log", "level", "info", false),
+    Ctx.
 
-teardown(_) ->
+teardown(Ctx) ->
+    config:delete("query_server_config", "reduce_limit", "true", false),
+    config:delete("log", "level", false),
+    test_util:stop_couch(Ctx),
     meck:unload().
 
-setup_oom() ->
-    test_util:start_couch([ioq]).
-
-teardown_oom(Ctx) ->
-    meck:unload(),
-    test_util:stop_couch(Ctx).
-
-sum_overflow_test_() ->
+query_server_limits_test_() ->
     {
-        "Test overflow detection in the _sum reduce function",
+        "Test overflow detection and batch splitting in query server",
         {
-            setup,
+            foreach,
             fun setup/0,
             fun teardown/1,
             [
-                fun should_return_error_on_overflow/0,
-                fun should_return_object_on_log/0,
-                fun should_return_object_on_false/0
-            ]
-        }
-    }.
-
-filter_oom_test_() ->
-    {
-        "Test recovery from oom in filters",
-        {
-            setup,
-            fun setup_oom/0,
-            fun teardown_oom/1,
-            [
-                fun should_split_large_batches/0
+                ?TDEF_FE(builtin_should_return_error_on_overflow),
+                ?TDEF_FE(builtin_should_return_object_on_log),
+                ?TDEF_FE(builtin_should_return_object_on_false),
+                ?TDEF_FE(js_reduce_should_return_error_on_overflow),
+                ?TDEF_FE(js_reduce_should_return_object_on_log),
+                ?TDEF_FE(js_reduce_should_return_object_on_false),
+                ?TDEF_FE(should_split_large_batches)
             ]
         }
     }.
 
-should_return_error_on_overflow() ->
-    meck:reset([config, couch_log]),
-    meck:expect(
-        config,
-        get,
-        ["query_server_config", "reduce_limit", "true"],
-        "true"
-    ),
-    meck:expect(couch_log, error, ['_', '_'], ok),
+builtin_should_return_error_on_overflow(_) ->
+    config:set("query_server_config", "reduce_limit", "true", false),
+    meck:reset(couch_log),
     KVs = gen_sum_kvs(),
     {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs),
     ?assertMatch({[{<<"error">>, <<"builtin_reduce_error">>} | _]}, Result),
-    ?assert(meck:called(config, get, '_')),
     ?assert(meck:called(couch_log, error, '_')).
 
-should_return_object_on_log() ->
-    meck:reset([config, couch_log]),
-    meck:expect(
-        config,
-        get,
-        ["query_server_config", "reduce_limit", "true"],
-        "log"
-    ),
-    meck:expect(couch_log, error, ['_', '_'], ok),
+builtin_should_return_object_on_log(_) ->
+    config:set("query_server_config", "reduce_limit", "log", false),
+    meck:reset(couch_log),
     KVs = gen_sum_kvs(),
     {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs),
     ?assertMatch({[_ | _]}, Result),
     Keys = [K || {K, _} <- element(1, Result)],
     ?assert(not lists:member(<<"error">>, Keys)),
-    ?assert(meck:called(config, get, '_')),
     ?assert(meck:called(couch_log, error, '_')).
 
-should_return_object_on_false() ->
-    meck:reset([config, couch_log]),
-    meck:expect(
-        config,
-        get,
-        ["query_server_config", "reduce_limit", "true"],
-        "false"
-    ),
-    meck:expect(couch_log, error, ['_', '_'], ok),
+builtin_should_return_object_on_false(_) ->
+    config:set("query_server_config", "reduce_limit", "false", false),
+    meck:reset(couch_log),
     KVs = gen_sum_kvs(),
     {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs),
     ?assertMatch({[_ | _]}, Result),
     Keys = [K || {K, _} <- element(1, Result)],
     ?assert(not lists:member(<<"error">>, Keys)),
-    ?assert(meck:called(config, get, '_')),
     ?assertNot(meck:called(couch_log, error, '_')).
 
-should_split_large_batches() ->
+js_reduce_should_return_error_on_overflow(_) ->
+    config:set("query_server_config", "reduce_limit", "true", false),
+    meck:reset(couch_log),
+    KVs = gen_sum_kvs(),
+    {ok, [Result]} = couch_query_servers:reduce(<<"javascript">>, [sum_js()], 
KVs),
+    ?assertMatch({[{reduce_overflow_error, <<"Reduce output must shrink", 
_/binary>>}]}, Result),
+    ?assert(meck:called(couch_log, error, '_')).
+
+js_reduce_should_return_object_on_log(_) ->
+    config:set("query_server_config", "reduce_limit", "log", false),
+    meck:reset(couch_log),
+    KVs = gen_sum_kvs(),
+    {ok, [Result]} = couch_query_servers:reduce(<<"javascript">>, [sum_js()], 
KVs),
+    ?assertMatch([<<"result">>, [_ | _]], Result),
+    ?assert(meck:called(couch_log, info, '_')).
+
+js_reduce_should_return_object_on_false(_) ->
+    config:set("query_server_config", "reduce_limit", "false", false),
+    meck:reset(couch_log),
+    KVs = gen_sum_kvs(),
+    {ok, [Result]} = couch_query_servers:reduce(<<"javascript">>, [sum_js()], 
KVs),
+    ?assertMatch([<<"result">>, [_ | _]], Result),
+    ?assertNot(meck:called(couch_log, info, '_')).
+
+should_split_large_batches(_) ->
     Req = {json_req, {[]}},
     Db = <<"somedb">>,
     DDoc = #doc{
@@ -152,3 +146,11 @@ gen_sum_kvs() ->
         end,
         lists:seq(1, 10)
     ).
+
+sum_js() ->
+    % Don't do this in real views
+    <<"\n"
+    "     function(keys, vals, rereduce) {\n"
+    "         return ['result', vals.concat(vals)]\n"
+    "     }\n"
+    "    ">>.
diff --git a/src/docs/src/config/query-servers.rst 
b/src/docs/src/config/query-servers.rst
index 285bf1280..ac6a05e0a 100644
--- a/src/docs/src/config/query-servers.rst
+++ b/src/docs/src/config/query-servers.rst
@@ -139,7 +139,9 @@ Query Servers Configuration
     .. config:option:: reduce_limit :: Reduce limit control
 
         Controls `Reduce overflow` error that raises when output of
-        :ref:`reduce functions <reducefun>` is too big::
+        :ref:`reduce functions <reducefun>` is too big. The possible values are
+        ``true``, ``false`` or ``log``. The ``log`` value will log a warning
+        instead of crashing the view ::
 
             [query_server_config]
             reduce_limit = true
diff --git a/src/docs/src/ddocs/ddocs.rst b/src/docs/src/ddocs/ddocs.rst
index 4d3c06633..fcb741e5f 100644
--- a/src/docs/src/ddocs/ddocs.rst
+++ b/src/docs/src/ddocs/ddocs.rst
@@ -152,6 +152,9 @@ that the main task of reduce functions is to *reduce* the 
mapped result, not to
 make it bigger. Generally, your reduce function should converge rapidly to a
 single value - which could be an array or similar object.
 
+Set ``reduce_limit`` to ``log`` so views which would crash if the setting were
+``true`` would instead return the result and log an ``info`` level warning.
+
 .. _reducefun/builtin:
 
 Built-in Reduce Functions

Reply via email to