This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch fix-local-rev-parsing
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 2080aee65e2a347b43cc32e5ee3c18f5a99989b5
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Fri Nov 21 15:02:50 2025 -0500

    Fix local doc rev parsing
    
    Previously, we applied the same "magic" hex decoding to revision ids for 
local
    docs as we did for regular docs. The "magic" is to detect the case of 
binaries
    with length 32 and assume they are hex-encoded, and then hex-decode them. 
When
    encoding them we did the opposite -- checked if they are 16 bytes long, and 
if,
    so we hex-encoded them. That's a nice optimization to half the storage 
needed
    for revs internally.
    
    However, the trick above doesn't really apply to local docs. For local docs
    revision ids have the format <<"0-$N">>. Where N is a decimal number. It 
starts
    at 1 with the first update, then gets bumped on every update. The `0` prefix
    though never changes, it's always `0`.
    
    Since regular (non-local) docs cannot have a revision depth of 0, it's only 
the
    case for local docs we can detect the case of encoding local doc revision 
and
    skip the magic hex encoding and decoding to remove the strange corner case.
---
 src/couch/src/couch_doc.erl                 | 22 ++++++++++----------
 src/couch/test/eunit/couch_db_doc_tests.erl | 32 +++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/src/couch/src/couch_doc.erl b/src/couch/src/couch_doc.erl
index 7ad0f76b6..0993b85dc 100644
--- a/src/couch/src/couch_doc.erl
+++ b/src/couch/src/couch_doc.erl
@@ -68,20 +68,20 @@ to_json_revisions(Options, Start, RevIds0) ->
                 {<<"_revisions">>,
                     {[
                         {<<"start">>, Start},
-                        {<<"ids">>, [revid_to_str(R) || R <- RevIds]}
+                        {<<"ids">>, [revid_to_str(Start, R) || R <- RevIds]}
                     ]}}
             ]
     end.
 
-revid_to_str(RevId) when size(RevId) =:= 16 ->
+revid_to_str(Start, RevId) when Start =/= 0, byte_size(RevId) =:= 16 ->
     couch_util:to_hex_bin(RevId);
-revid_to_str(RevId) when is_binary(RevId) ->
+revid_to_str(Start, RevId) when is_integer(Start), Start >= 0, 
is_binary(RevId) ->
     RevId;
-revid_to_str(RevId) when is_list(RevId) ->
+revid_to_str(Start, RevId) when is_integer(Start), Start >= 0, is_list(RevId) 
->
     list_to_binary(RevId).
 
 rev_to_str({Pos, RevId}) ->
-    <<(integer_to_binary(Pos))/binary, $-, (revid_to_str(RevId))/binary>>.
+    <<(integer_to_binary(Pos))/binary, $-, (revid_to_str(Pos, RevId))/binary>>.
 
 revs_to_strs([]) ->
     [];
@@ -188,13 +188,13 @@ from_json_obj({Props}, DbName) ->
 from_json_obj(_Other, _) ->
     throw({bad_request, "Document must be a JSON object"}).
 
-parse_revid(RevId) when is_binary(RevId), size(RevId) =:= 32 ->
+parse_revid(Start, RevId) when Start =/= 0, is_binary(RevId), byte_size(RevId) 
=:= 32 ->
     binary:decode_hex(RevId);
-parse_revid(RevId) when is_list(RevId), length(RevId) =:= 32 ->
+parse_revid(Start, RevId) when Start =/= 0, is_list(RevId), length(RevId) =:= 
32 ->
     binary:decode_hex(list_to_binary(RevId));
-parse_revid(RevId) when is_binary(RevId) ->
+parse_revid(Start, RevId) when is_integer(Start), Start >= 0, is_binary(RevId) 
->
     RevId;
-parse_revid(RevId) when is_list(RevId) ->
+parse_revid(Start, RevId) when is_integer(Start), Start >= 0, is_list(RevId) ->
     ?l2b(RevId).
 
 parse_rev(Rev) when is_binary(Rev) ->
@@ -211,7 +211,7 @@ parse_rev(Rev) when is_list(Rev) ->
         {Pos, [$- | RevId]} ->
             try
                 IntPos = list_to_integer(Pos),
-                {IntPos, parse_revid(RevId)}
+                {IntPos, parse_revid(IntPos, RevId)}
             catch
                 error:badarg -> throw({bad_request, <<"Invalid rev format">>})
             end;
@@ -313,7 +313,7 @@ transfer_fields([{<<"_revisions">>, {Props}} | Rest], Doc, 
DbName) ->
     RevIds2 = lists:map(
         fun(RevId) ->
             try
-                parse_revid(RevId)
+                parse_revid(Start, RevId)
             catch
                 error:function_clause ->
                     throw({doc_validation, "RevId isn't a string"});
diff --git a/src/couch/test/eunit/couch_db_doc_tests.erl 
b/src/couch/test/eunit/couch_db_doc_tests.erl
index dc1ac79e6..46a42fe2d 100644
--- a/src/couch/test/eunit/couch_db_doc_tests.erl
+++ b/src/couch/test/eunit/couch_db_doc_tests.erl
@@ -115,3 +115,35 @@ add_revisions(Db, DocId, Rev, N) ->
         Rev,
         lists:seq(1, N)
     ).
+
+rev_to_str_test() ->
+    ?assertEqual(<<"0-0">>, couch_doc:rev_to_str({0, "0"})),
+    ?assertEqual(<<"0-0">>, couch_doc:rev_to_str({0, <<"0">>})),
+    ?assertEqual(<<"1-a">>, couch_doc:rev_to_str({1, <<"a">>})),
+    Bytes = <<<<48>> || _ <- lists:seq(1, 16)>>,
+    ?assertEqual(<<"0-0000000000000000">>, couch_doc:rev_to_str({0, Bytes})),
+    ?assertEqual(<<"1-30303030303030303030303030303030">>, 
couch_doc:rev_to_str({1, Bytes})),
+    ?assertError(badarg, couch_doc:rev_to_str({a, b})),
+    ?assertError(function_clause, couch_doc:rev_to_str({-1, <<"x">>})),
+    ?assertError(function_clause, couch_doc:rev_to_str(foo)).
+
+parse_rev_test() ->
+    ?assertEqual({1, <<"x">>}, couch_doc:parse_rev("1-x")),
+    ?assertEqual({1, <<"x">>}, couch_doc:parse_rev(<<"1-x">>)),
+    ?assertEqual({0, <<"y">>}, couch_doc:parse_rev("0-y")),
+    ?assertEqual({1234567890, <<"abc">>}, 
couch_doc:parse_rev("1234567890-abc")),
+    ?assertEqual(
+        {0, <<"00000000000000000000000000000000">>},
+        couch_doc:parse_rev("0-00000000000000000000000000000000")
+    ),
+    ?assertEqual(
+        {1, <<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>>},
+        couch_doc:parse_rev("1-00000000000000000000000000000000")
+    ),
+    ?assertThrow({bad_request, _}, couch_doc:parse_rev(foo)),
+    ?assertThrow({bad_request, _}, couch_doc:parse_rev("x-y")),
+    ?assertThrow({bad_request, _}, couch_doc:parse_rev("x")),
+    ?assertThrow({bad_request, _}, couch_doc:parse_rev("0")),
+    ?assertThrow({bad_request, _}, couch_doc:parse_rev("1")),
+    ?assertThrow({bad_request, _}, couch_doc:parse_rev("-")),
+    ?assertThrow({bad_request, _}, couch_doc:parse_rev("-0-1")).

Reply via email to