This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch merge-3.4.3 in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 6d4119f5fdb2822be799639555ced5647b11137a Author: Nick Vatamaniuc <[email protected]> AuthorDate: Sat Jan 11 13:52:30 2025 -0500 Remove pread_limit config from couch_file It's an almost 10 year old, undocumented, and never worked properly. We tried using it at Cloudant once then promptly disabled it and never touched it again. What usually reports corruption fairly well is the "read beyond eof" check. As such improve that error: * Show the total bytes requested * Show both the current record file position and the actual file size In some cases we relied on badmatch for error reporting. Instead, use the early throw gen_server trick to return a proper error with a file path and position if pread fail. --- src/couch/src/couch_file.erl | 54 ++++++++++---------- src/couch/test/eunit/couch_file_tests.erl | 82 +++++++++++++------------------ 2 files changed, 63 insertions(+), 73 deletions(-) diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl index f15049155..248a26097 100644 --- a/src/couch/src/couch_file.erl +++ b/src/couch/src/couch_file.erl @@ -31,8 +31,7 @@ fd, is_sys, eof = 0, - db_monitor, - pread_limit = 0 + db_monitor }). % public API @@ -409,7 +408,6 @@ last_read(Fd) when is_pid(Fd) -> init({Filepath, Options, ReturnPid, Ref}) -> OpenOptions = file_open_options(Options), - Limit = get_pread_limit(), IsSys = lists:member(sys_db, Options), update_read_timestamp(), case lists:member(create, Options) of @@ -432,7 +430,7 @@ init({Filepath, Options, ReturnPid, Ref}) -> ok = fsync(Fd), maybe_track_open_os_files(Options), erlang:send_after(?INITIAL_WAIT, self(), maybe_close), - {ok, #file{fd = Fd, is_sys = IsSys, pread_limit = Limit}}; + {ok, #file{fd = Fd, is_sys = IsSys}}; false -> ok = file:close(Fd), init_status_error(ReturnPid, Ref, {error, eexist}) @@ -440,7 +438,7 @@ init({Filepath, Options, ReturnPid, Ref}) -> false -> maybe_track_open_os_files(Options), erlang:send_after(?INITIAL_WAIT, self(), maybe_close), - {ok, #file{fd = Fd, is_sys = IsSys, pread_limit = Limit}} + {ok, #file{fd = Fd, is_sys = IsSys}} end; Error -> init_status_error(ReturnPid, Ref, Error) @@ -457,7 +455,7 @@ init({Filepath, Options, ReturnPid, Ref}) -> maybe_track_open_os_files(Options), {ok, Eof} = file:position(Fd, eof), erlang:send_after(?INITIAL_WAIT, self(), maybe_close), - {ok, #file{fd = Fd, eof = Eof, is_sys = IsSys, pread_limit = Limit}}; + {ok, #file{fd = Fd, eof = Eof, is_sys = IsSys}}; Error -> init_status_error(ReturnPid, Ref, Error) end; @@ -688,35 +686,45 @@ find_newest_header(Fd, [{Location, Size} | LocationSizes]) -> find_newest_header(Fd, LocationSizes) end. -read_multi_raw_iolists_int(#file{fd = Fd} = File, PosLens) -> +read_multi_raw_iolists_int(#file{fd = Fd, eof = Eof} = File, PosLens) -> MapFun = fun({Pos, Len}) -> get_pread_locnum(File, Pos, Len) end, LocNums = lists:map(MapFun, PosLens), - {ok, Bins} = file:pread(Fd, LocNums), ZipFun = fun({Pos, TotalBytes}, Bin) -> - <<RawBin:TotalBytes/binary>> = Bin, - {remove_block_prefixes(Pos rem ?SIZE_BLOCK, RawBin), Pos + TotalBytes} + case is_binary(Bin) andalso byte_size(Bin) == TotalBytes of + true -> + {remove_block_prefixes(Pos rem ?SIZE_BLOCK, Bin), Pos + TotalBytes}; + false -> + couch_stats:increment_counter([pread, exceed_eof]), + {ok, CurEof} = file:position(File#file.fd, eof), + {_Fd, Filepath} = get(couch_file_fd), + throw_stop({read_beyond_eof, Filepath, Pos, TotalBytes, Eof, CurEof}, File) + end end, - lists:zipwith(ZipFun, LocNums, Bins). + case file:pread(Fd, LocNums) of + {ok, Bins} -> + lists:zipwith(ZipFun, LocNums, Bins); + {error, Error} -> + {_Fd, Filepath} = get(couch_file_fd), + throw_stop({pread, Filepath, Error, hd(LocNums)}, File) + end. -get_pread_locnum(#file{} = File, Pos, Len) -> - #file{eof = Eof, pread_limit = PreadLimit} = File, +get_pread_locnum(#file{eof = Eof} = File, Pos, Len) -> BlockOffset = Pos rem ?SIZE_BLOCK, TotalBytes = calculate_total_read_len(BlockOffset, Len), case Pos + TotalBytes of Size when Size > Eof -> couch_stats:increment_counter([pread, exceed_eof]), + {ok, CurEof} = file:position(File#file.fd, eof), {_Fd, Filepath} = get(couch_file_fd), - ErrEof = {error, {read_beyond_eof, Filepath, Pos, Eof}}, - throw({stop, ErrEof, ErrEof, File}); - Size when Size > PreadLimit -> - couch_stats:increment_counter([pread, exceed_limit]), - {_Fd, Filepath} = get(couch_file_fd), - ErrPread = {error, {exceed_pread_limit, Filepath, PreadLimit}}, - throw({stop, ErrPread, ErrPread, File}); + throw_stop({read_beyond_eof, Filepath, Pos, TotalBytes, Eof, CurEof}, File); _ -> {Pos, TotalBytes} end. +throw_stop(Error, #file{} = File) -> + % This follows the gen_server reply via a throw pattern + throw({stop, {error, Error}, {error, Error}, File}). + -spec extract_checksum(iolist()) -> {iolist(), binary()}. extract_checksum(FullIoList) -> {ChecksumList, IoList} = split_iolist(FullIoList, 16, []), @@ -858,12 +866,6 @@ process_info(Pid) -> update_read_timestamp() -> put(read_timestamp, os:timestamp()). -get_pread_limit() -> - case config:get_integer("couchdb", "max_pread_size", 0) of - N when N > 0 -> N; - _ -> infinity - end. - %% in event of a partially successful write. reset_eof(#file{} = File) -> {ok, Eof} = file:position(File#file.fd, eof), diff --git a/src/couch/test/eunit/couch_file_tests.erl b/src/couch/test/eunit/couch_file_tests.erl index 64d61fbac..36b2a61a1 100644 --- a/src/couch/test/eunit/couch_file_tests.erl +++ b/src/couch/test/eunit/couch_file_tests.erl @@ -20,9 +20,11 @@ setup() -> {ok, Fd} = couch_file:open(?tempfile(), [create, overwrite]), + meck:new(file, [passthrough, unstick]), Fd. teardown(Fd) -> + meck:unload(file), case is_process_alive(Fd) of true -> ok = couch_file:close(Fd); false -> ok @@ -70,7 +72,7 @@ read_write_test_() -> "Common file read/write tests", { setup, - fun() -> test_util:start_couch() end, + fun test_util:start_couch/0, fun test_util:stop_couch/1, { foreach, @@ -89,6 +91,8 @@ read_write_test_() -> ?TDEF_FE(should_fsync_by_path), ?TDEF_FE(should_update_fsync_stats), ?TDEF_FE(should_not_read_beyond_eof), + ?TDEF_FE(should_not_read_beyond_eof_when_externally_truncated), + ?TDEF_FE(should_catch_pread_failure), ?TDEF_FE(should_truncate), ?TDEF_FE(should_set_db_pid), ?TDEF_FE(should_update_last_read_time), @@ -174,7 +178,35 @@ should_not_read_beyond_eof(_) -> ok = file:pwrite(Io, Pos, <<0:1/integer, DoubleBin:31/integer>>), file:close(Io), unlink(Fd), - ?assertMatch({error, {read_beyond_eof, Filepath, _, _}}, couch_file:pread_binary(Fd, Pos)), + ?assertMatch( + {error, {read_beyond_eof, Filepath, _, _, _, _}}, couch_file:pread_binary(Fd, Pos) + ), + catch file:close(Fd). + +should_not_read_beyond_eof_when_externally_truncated(_) -> + {ok, Fd} = couch_file:open(?tempfile(), [create, overwrite]), + Bin = list_to_binary(lists:duplicate(100, 0)), + {ok, Pos, _Size} = couch_file:append_binary(Fd, Bin), + {_, Filepath} = couch_file:process_info(Fd), + %% corrupt db file by truncating it + {ok, Io} = file:open(Filepath, [read, write, binary]), + % 4 (len) + 1 byte + {ok, _} = file:position(Io, Pos + 5), + ok = file:truncate(Io), + file:close(Io), + unlink(Fd), + ?assertMatch( + {error, {read_beyond_eof, Filepath, _, _, _, _}}, couch_file:pread_binary(Fd, Pos) + ), + catch file:close(Fd). + +should_catch_pread_failure(_) -> + {ok, Fd} = couch_file:open(?tempfile(), [create, overwrite]), + {ok, Pos, _Size} = couch_file:append_binary(Fd, <<"x">>), + {_, Filepath} = couch_file:process_info(Fd), + meck:expect(file, pread, 2, {error, einval}), + unlink(Fd), + ?assertMatch({error, {pread, Filepath, einval, {0, _}}}, couch_file:pread_terms(Fd, [Pos])), catch file:close(Fd). should_truncate(Fd) -> @@ -238,7 +270,7 @@ should_apply_overwrite_create_option(Fd) -> ?assertEqual(ok, couch_file:close(Fd)), {ok, Fd1} = couch_file:open(Path, [create, overwrite]), unlink(Fd1), - ExpectError = {error, {read_beyond_eof, Path, Pos, 0}}, + ExpectError = {error, {read_beyond_eof, Path, Pos, 5, 0, 0}}, ?assertEqual(ExpectError, couch_file:pread_term(Fd1, Pos)). should_error_on_creation_if_exists(Fd) -> @@ -280,50 +312,6 @@ should_crash_on_unexpected_cast(Fd) -> ?assertEqual({invalid_cast, potato}, Reason) end. -pread_limit_test_() -> - { - "Read limit tests", - { - setup, - fun() -> - Ctx = test_util:start_couch([ioq]), - config:set("couchdb", "max_pread_size", "50000", _Persist = false), - Ctx - end, - fun(Ctx) -> - config:delete("couchdb", "max_pread_size", _Persist = false), - test_util:stop_couch(Ctx) - end, - { - foreach, - fun setup/0, - fun teardown/1, - [ - ?TDEF_FE(should_increase_file_size_on_write), - ?TDEF_FE(should_return_current_file_size_on_write), - ?TDEF_FE(should_write_and_read_term), - ?TDEF_FE(should_write_and_read_binary), - ?TDEF_FE(should_not_read_more_than_pread_limit) - ] - } - } - }. - -should_not_read_more_than_pread_limit(_) -> - {ok, Fd} = couch_file:open(?tempfile(), [create, overwrite]), - {_, Filepath} = couch_file:process_info(Fd), - BigBin = list_to_binary(lists:duplicate(100000, 0)), - {ok, Pos, _Size} = couch_file:append_binary(Fd, BigBin), - unlink(Fd), - Ref = monitor(process, Fd), - ExpectError = {error, {exceed_pread_limit, Filepath, 50000}}, - ?assertEqual(ExpectError, couch_file:pread_binary(Fd, Pos)), - receive - {'DOWN', Ref, _, _, Reason} -> - ?assertEqual(ExpectError, Reason) - end, - catch file:close(Fd). - header_test_() -> { "File header read/write tests",
