This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch merge-3.4.3
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 6d4119f5fdb2822be799639555ced5647b11137a
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Sat Jan 11 13:52:30 2025 -0500

    Remove pread_limit config from couch_file
    
    It's an almost 10 year old, undocumented, and never worked properly. We 
tried
    using it at Cloudant once then promptly disabled it and never touched it 
again.
    
    What usually reports corruption fairly well is the "read beyond eof" check. 
As
    such improve that error:
      * Show the total bytes requested
      * Show both the current record file position and the actual file size
    
    In some cases we relied on badmatch for error reporting. Instead, use the 
early
    throw gen_server trick to return a proper error with a file path and 
position
    if pread fail.
---
 src/couch/src/couch_file.erl              | 54 ++++++++++----------
 src/couch/test/eunit/couch_file_tests.erl | 82 +++++++++++++------------------
 2 files changed, 63 insertions(+), 73 deletions(-)

diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl
index f15049155..248a26097 100644
--- a/src/couch/src/couch_file.erl
+++ b/src/couch/src/couch_file.erl
@@ -31,8 +31,7 @@
     fd,
     is_sys,
     eof = 0,
-    db_monitor,
-    pread_limit = 0
+    db_monitor
 }).
 
 % public API
@@ -409,7 +408,6 @@ last_read(Fd) when is_pid(Fd) ->
 
 init({Filepath, Options, ReturnPid, Ref}) ->
     OpenOptions = file_open_options(Options),
-    Limit = get_pread_limit(),
     IsSys = lists:member(sys_db, Options),
     update_read_timestamp(),
     case lists:member(create, Options) of
@@ -432,7 +430,7 @@ init({Filepath, Options, ReturnPid, Ref}) ->
                                     ok = fsync(Fd),
                                     maybe_track_open_os_files(Options),
                                     erlang:send_after(?INITIAL_WAIT, self(), 
maybe_close),
-                                    {ok, #file{fd = Fd, is_sys = IsSys, 
pread_limit = Limit}};
+                                    {ok, #file{fd = Fd, is_sys = IsSys}};
                                 false ->
                                     ok = file:close(Fd),
                                     init_status_error(ReturnPid, Ref, {error, 
eexist})
@@ -440,7 +438,7 @@ init({Filepath, Options, ReturnPid, Ref}) ->
                         false ->
                             maybe_track_open_os_files(Options),
                             erlang:send_after(?INITIAL_WAIT, self(), 
maybe_close),
-                            {ok, #file{fd = Fd, is_sys = IsSys, pread_limit = 
Limit}}
+                            {ok, #file{fd = Fd, is_sys = IsSys}}
                     end;
                 Error ->
                     init_status_error(ReturnPid, Ref, Error)
@@ -457,7 +455,7 @@ init({Filepath, Options, ReturnPid, Ref}) ->
                             maybe_track_open_os_files(Options),
                             {ok, Eof} = file:position(Fd, eof),
                             erlang:send_after(?INITIAL_WAIT, self(), 
maybe_close),
-                            {ok, #file{fd = Fd, eof = Eof, is_sys = IsSys, 
pread_limit = Limit}};
+                            {ok, #file{fd = Fd, eof = Eof, is_sys = IsSys}};
                         Error ->
                             init_status_error(ReturnPid, Ref, Error)
                     end;
@@ -688,35 +686,45 @@ find_newest_header(Fd, [{Location, Size} | 
LocationSizes]) ->
             find_newest_header(Fd, LocationSizes)
     end.
 
-read_multi_raw_iolists_int(#file{fd = Fd} = File, PosLens) ->
+read_multi_raw_iolists_int(#file{fd = Fd, eof = Eof} = File, PosLens) ->
     MapFun = fun({Pos, Len}) -> get_pread_locnum(File, Pos, Len) end,
     LocNums = lists:map(MapFun, PosLens),
-    {ok, Bins} = file:pread(Fd, LocNums),
     ZipFun = fun({Pos, TotalBytes}, Bin) ->
-        <<RawBin:TotalBytes/binary>> = Bin,
-        {remove_block_prefixes(Pos rem ?SIZE_BLOCK, RawBin), Pos + TotalBytes}
+        case is_binary(Bin) andalso byte_size(Bin) == TotalBytes of
+            true ->
+                {remove_block_prefixes(Pos rem ?SIZE_BLOCK, Bin), Pos + 
TotalBytes};
+            false ->
+                couch_stats:increment_counter([pread, exceed_eof]),
+                {ok, CurEof} = file:position(File#file.fd, eof),
+                {_Fd, Filepath} = get(couch_file_fd),
+                throw_stop({read_beyond_eof, Filepath, Pos, TotalBytes, Eof, 
CurEof}, File)
+        end
     end,
-    lists:zipwith(ZipFun, LocNums, Bins).
+    case file:pread(Fd, LocNums) of
+        {ok, Bins} ->
+            lists:zipwith(ZipFun, LocNums, Bins);
+        {error, Error} ->
+            {_Fd, Filepath} = get(couch_file_fd),
+            throw_stop({pread, Filepath, Error, hd(LocNums)}, File)
+    end.
 
-get_pread_locnum(#file{} = File, Pos, Len) ->
-    #file{eof = Eof, pread_limit = PreadLimit} = File,
+get_pread_locnum(#file{eof = Eof} = File, Pos, Len) ->
     BlockOffset = Pos rem ?SIZE_BLOCK,
     TotalBytes = calculate_total_read_len(BlockOffset, Len),
     case Pos + TotalBytes of
         Size when Size > Eof ->
             couch_stats:increment_counter([pread, exceed_eof]),
+            {ok, CurEof} = file:position(File#file.fd, eof),
             {_Fd, Filepath} = get(couch_file_fd),
-            ErrEof = {error, {read_beyond_eof, Filepath, Pos, Eof}},
-            throw({stop, ErrEof, ErrEof, File});
-        Size when Size > PreadLimit ->
-            couch_stats:increment_counter([pread, exceed_limit]),
-            {_Fd, Filepath} = get(couch_file_fd),
-            ErrPread = {error, {exceed_pread_limit, Filepath, PreadLimit}},
-            throw({stop, ErrPread, ErrPread, File});
+            throw_stop({read_beyond_eof, Filepath, Pos, TotalBytes, Eof, 
CurEof}, File);
         _ ->
             {Pos, TotalBytes}
     end.
 
+throw_stop(Error, #file{} = File) ->
+    % This follows the gen_server reply via a throw pattern
+    throw({stop, {error, Error}, {error, Error}, File}).
+
 -spec extract_checksum(iolist()) -> {iolist(), binary()}.
 extract_checksum(FullIoList) ->
     {ChecksumList, IoList} = split_iolist(FullIoList, 16, []),
@@ -858,12 +866,6 @@ process_info(Pid) ->
 update_read_timestamp() ->
     put(read_timestamp, os:timestamp()).
 
-get_pread_limit() ->
-    case config:get_integer("couchdb", "max_pread_size", 0) of
-        N when N > 0 -> N;
-        _ -> infinity
-    end.
-
 %% in event of a partially successful write.
 reset_eof(#file{} = File) ->
     {ok, Eof} = file:position(File#file.fd, eof),
diff --git a/src/couch/test/eunit/couch_file_tests.erl 
b/src/couch/test/eunit/couch_file_tests.erl
index 64d61fbac..36b2a61a1 100644
--- a/src/couch/test/eunit/couch_file_tests.erl
+++ b/src/couch/test/eunit/couch_file_tests.erl
@@ -20,9 +20,11 @@
 
 setup() ->
     {ok, Fd} = couch_file:open(?tempfile(), [create, overwrite]),
+    meck:new(file, [passthrough, unstick]),
     Fd.
 
 teardown(Fd) ->
+    meck:unload(file),
     case is_process_alive(Fd) of
         true -> ok = couch_file:close(Fd);
         false -> ok
@@ -70,7 +72,7 @@ read_write_test_() ->
         "Common file read/write tests",
         {
             setup,
-            fun() -> test_util:start_couch() end,
+            fun test_util:start_couch/0,
             fun test_util:stop_couch/1,
             {
                 foreach,
@@ -89,6 +91,8 @@ read_write_test_() ->
                     ?TDEF_FE(should_fsync_by_path),
                     ?TDEF_FE(should_update_fsync_stats),
                     ?TDEF_FE(should_not_read_beyond_eof),
+                    
?TDEF_FE(should_not_read_beyond_eof_when_externally_truncated),
+                    ?TDEF_FE(should_catch_pread_failure),
                     ?TDEF_FE(should_truncate),
                     ?TDEF_FE(should_set_db_pid),
                     ?TDEF_FE(should_update_last_read_time),
@@ -174,7 +178,35 @@ should_not_read_beyond_eof(_) ->
     ok = file:pwrite(Io, Pos, <<0:1/integer, DoubleBin:31/integer>>),
     file:close(Io),
     unlink(Fd),
-    ?assertMatch({error, {read_beyond_eof, Filepath, _, _}}, 
couch_file:pread_binary(Fd, Pos)),
+    ?assertMatch(
+        {error, {read_beyond_eof, Filepath, _, _, _, _}}, 
couch_file:pread_binary(Fd, Pos)
+    ),
+    catch file:close(Fd).
+
+should_not_read_beyond_eof_when_externally_truncated(_) ->
+    {ok, Fd} = couch_file:open(?tempfile(), [create, overwrite]),
+    Bin = list_to_binary(lists:duplicate(100, 0)),
+    {ok, Pos, _Size} = couch_file:append_binary(Fd, Bin),
+    {_, Filepath} = couch_file:process_info(Fd),
+    %% corrupt db file by truncating it
+    {ok, Io} = file:open(Filepath, [read, write, binary]),
+    % 4 (len) + 1 byte
+    {ok, _} = file:position(Io, Pos + 5),
+    ok = file:truncate(Io),
+    file:close(Io),
+    unlink(Fd),
+    ?assertMatch(
+        {error, {read_beyond_eof, Filepath, _, _, _, _}}, 
couch_file:pread_binary(Fd, Pos)
+    ),
+    catch file:close(Fd).
+
+should_catch_pread_failure(_) ->
+    {ok, Fd} = couch_file:open(?tempfile(), [create, overwrite]),
+    {ok, Pos, _Size} = couch_file:append_binary(Fd, <<"x">>),
+    {_, Filepath} = couch_file:process_info(Fd),
+    meck:expect(file, pread, 2, {error, einval}),
+    unlink(Fd),
+    ?assertMatch({error, {pread, Filepath, einval, {0, _}}}, 
couch_file:pread_terms(Fd, [Pos])),
     catch file:close(Fd).
 
 should_truncate(Fd) ->
@@ -238,7 +270,7 @@ should_apply_overwrite_create_option(Fd) ->
     ?assertEqual(ok, couch_file:close(Fd)),
     {ok, Fd1} = couch_file:open(Path, [create, overwrite]),
     unlink(Fd1),
-    ExpectError = {error, {read_beyond_eof, Path, Pos, 0}},
+    ExpectError = {error, {read_beyond_eof, Path, Pos, 5, 0, 0}},
     ?assertEqual(ExpectError, couch_file:pread_term(Fd1, Pos)).
 
 should_error_on_creation_if_exists(Fd) ->
@@ -280,50 +312,6 @@ should_crash_on_unexpected_cast(Fd) ->
             ?assertEqual({invalid_cast, potato}, Reason)
     end.
 
-pread_limit_test_() ->
-    {
-        "Read limit tests",
-        {
-            setup,
-            fun() ->
-                Ctx = test_util:start_couch([ioq]),
-                config:set("couchdb", "max_pread_size", "50000", _Persist = 
false),
-                Ctx
-            end,
-            fun(Ctx) ->
-                config:delete("couchdb", "max_pread_size", _Persist = false),
-                test_util:stop_couch(Ctx)
-            end,
-            {
-                foreach,
-                fun setup/0,
-                fun teardown/1,
-                [
-                    ?TDEF_FE(should_increase_file_size_on_write),
-                    ?TDEF_FE(should_return_current_file_size_on_write),
-                    ?TDEF_FE(should_write_and_read_term),
-                    ?TDEF_FE(should_write_and_read_binary),
-                    ?TDEF_FE(should_not_read_more_than_pread_limit)
-                ]
-            }
-        }
-    }.
-
-should_not_read_more_than_pread_limit(_) ->
-    {ok, Fd} = couch_file:open(?tempfile(), [create, overwrite]),
-    {_, Filepath} = couch_file:process_info(Fd),
-    BigBin = list_to_binary(lists:duplicate(100000, 0)),
-    {ok, Pos, _Size} = couch_file:append_binary(Fd, BigBin),
-    unlink(Fd),
-    Ref = monitor(process, Fd),
-    ExpectError = {error, {exceed_pread_limit, Filepath, 50000}},
-    ?assertEqual(ExpectError, couch_file:pread_binary(Fd, Pos)),
-    receive
-        {'DOWN', Ref, _, _, Reason} ->
-            ?assertEqual(ExpectError, Reason)
-    end,
-    catch file:close(Fd).
-
 header_test_() ->
     {
         "File header read/write tests",

Reply via email to