This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/main by this push:
     new a5103556f Fix local doc rev parsing
a5103556f is described below

commit a5103556f1e72d521cda6c9f529f14f01378eca2
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Fri Nov 21 15:02:50 2025 -0500

    Fix local doc rev parsing
    
    Previously, we applied the same "magic" hex decoding to revision ids for 
local
    docs as we did for regular docs. The "magic" is to detect the case of 
binaries
    with length 32 and assume they are hex-encoded, and then hex-decode them. 
When
    encoding them we did the opposite -- checked if they are 16 bytes long, and 
if,
    so we hex-encoded them. That's a nice optimization to half the storage 
needed
    for revs internally.
    
    However, the trick above doesn't really apply to local docs. For local docs
    revision ids have the format <<"0-$N">>. Where N is a decimal number. It 
starts
    at 1 with the first update, then gets bumped on every update. The `0` prefix
    though never changes, it's always `0`.
    
    Since regular (non-local) docs cannot have a revision depth of 0 (it's only 
the
    case for local docs) then can detect the case of encoding local doc revision
    and skip the magic hex encoding and decoding to remove the strange corner 
case.
---
 src/couch/src/couch_doc.erl                   | 22 +++++++++---------
 src/couch/test/eunit/couch_db_doc_tests.erl   | 32 +++++++++++++++++++++++++++
 src/couch/test/eunit/couch_doc_json_tests.erl | 12 +++++++++-
 3 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/src/couch/src/couch_doc.erl b/src/couch/src/couch_doc.erl
index 7ad0f76b6..0993b85dc 100644
--- a/src/couch/src/couch_doc.erl
+++ b/src/couch/src/couch_doc.erl
@@ -68,20 +68,20 @@ to_json_revisions(Options, Start, RevIds0) ->
                 {<<"_revisions">>,
                     {[
                         {<<"start">>, Start},
-                        {<<"ids">>, [revid_to_str(R) || R <- RevIds]}
+                        {<<"ids">>, [revid_to_str(Start, R) || R <- RevIds]}
                     ]}}
             ]
     end.
 
-revid_to_str(RevId) when size(RevId) =:= 16 ->
+revid_to_str(Start, RevId) when Start =/= 0, byte_size(RevId) =:= 16 ->
     couch_util:to_hex_bin(RevId);
-revid_to_str(RevId) when is_binary(RevId) ->
+revid_to_str(Start, RevId) when is_integer(Start), Start >= 0, 
is_binary(RevId) ->
     RevId;
-revid_to_str(RevId) when is_list(RevId) ->
+revid_to_str(Start, RevId) when is_integer(Start), Start >= 0, is_list(RevId) 
->
     list_to_binary(RevId).
 
 rev_to_str({Pos, RevId}) ->
-    <<(integer_to_binary(Pos))/binary, $-, (revid_to_str(RevId))/binary>>.
+    <<(integer_to_binary(Pos))/binary, $-, (revid_to_str(Pos, RevId))/binary>>.
 
 revs_to_strs([]) ->
     [];
@@ -188,13 +188,13 @@ from_json_obj({Props}, DbName) ->
 from_json_obj(_Other, _) ->
     throw({bad_request, "Document must be a JSON object"}).
 
-parse_revid(RevId) when is_binary(RevId), size(RevId) =:= 32 ->
+parse_revid(Start, RevId) when Start =/= 0, is_binary(RevId), byte_size(RevId) 
=:= 32 ->
     binary:decode_hex(RevId);
-parse_revid(RevId) when is_list(RevId), length(RevId) =:= 32 ->
+parse_revid(Start, RevId) when Start =/= 0, is_list(RevId), length(RevId) =:= 
32 ->
     binary:decode_hex(list_to_binary(RevId));
-parse_revid(RevId) when is_binary(RevId) ->
+parse_revid(Start, RevId) when is_integer(Start), Start >= 0, is_binary(RevId) 
->
     RevId;
-parse_revid(RevId) when is_list(RevId) ->
+parse_revid(Start, RevId) when is_integer(Start), Start >= 0, is_list(RevId) ->
     ?l2b(RevId).
 
 parse_rev(Rev) when is_binary(Rev) ->
@@ -211,7 +211,7 @@ parse_rev(Rev) when is_list(Rev) ->
         {Pos, [$- | RevId]} ->
             try
                 IntPos = list_to_integer(Pos),
-                {IntPos, parse_revid(RevId)}
+                {IntPos, parse_revid(IntPos, RevId)}
             catch
                 error:badarg -> throw({bad_request, <<"Invalid rev format">>})
             end;
@@ -313,7 +313,7 @@ transfer_fields([{<<"_revisions">>, {Props}} | Rest], Doc, 
DbName) ->
     RevIds2 = lists:map(
         fun(RevId) ->
             try
-                parse_revid(RevId)
+                parse_revid(Start, RevId)
             catch
                 error:function_clause ->
                     throw({doc_validation, "RevId isn't a string"});
diff --git a/src/couch/test/eunit/couch_db_doc_tests.erl 
b/src/couch/test/eunit/couch_db_doc_tests.erl
index dc1ac79e6..46a42fe2d 100644
--- a/src/couch/test/eunit/couch_db_doc_tests.erl
+++ b/src/couch/test/eunit/couch_db_doc_tests.erl
@@ -115,3 +115,35 @@ add_revisions(Db, DocId, Rev, N) ->
         Rev,
         lists:seq(1, N)
     ).
+
+rev_to_str_test() ->
+    ?assertEqual(<<"0-0">>, couch_doc:rev_to_str({0, "0"})),
+    ?assertEqual(<<"0-0">>, couch_doc:rev_to_str({0, <<"0">>})),
+    ?assertEqual(<<"1-a">>, couch_doc:rev_to_str({1, <<"a">>})),
+    Bytes = <<<<48>> || _ <- lists:seq(1, 16)>>,
+    ?assertEqual(<<"0-0000000000000000">>, couch_doc:rev_to_str({0, Bytes})),
+    ?assertEqual(<<"1-30303030303030303030303030303030">>, 
couch_doc:rev_to_str({1, Bytes})),
+    ?assertError(badarg, couch_doc:rev_to_str({a, b})),
+    ?assertError(function_clause, couch_doc:rev_to_str({-1, <<"x">>})),
+    ?assertError(function_clause, couch_doc:rev_to_str(foo)).
+
+parse_rev_test() ->
+    ?assertEqual({1, <<"x">>}, couch_doc:parse_rev("1-x")),
+    ?assertEqual({1, <<"x">>}, couch_doc:parse_rev(<<"1-x">>)),
+    ?assertEqual({0, <<"y">>}, couch_doc:parse_rev("0-y")),
+    ?assertEqual({1234567890, <<"abc">>}, 
couch_doc:parse_rev("1234567890-abc")),
+    ?assertEqual(
+        {0, <<"00000000000000000000000000000000">>},
+        couch_doc:parse_rev("0-00000000000000000000000000000000")
+    ),
+    ?assertEqual(
+        {1, <<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>>},
+        couch_doc:parse_rev("1-00000000000000000000000000000000")
+    ),
+    ?assertThrow({bad_request, _}, couch_doc:parse_rev(foo)),
+    ?assertThrow({bad_request, _}, couch_doc:parse_rev("x-y")),
+    ?assertThrow({bad_request, _}, couch_doc:parse_rev("x")),
+    ?assertThrow({bad_request, _}, couch_doc:parse_rev("0")),
+    ?assertThrow({bad_request, _}, couch_doc:parse_rev("1")),
+    ?assertThrow({bad_request, _}, couch_doc:parse_rev("-")),
+    ?assertThrow({bad_request, _}, couch_doc:parse_rev("-0-1")).
diff --git a/src/couch/test/eunit/couch_doc_json_tests.erl 
b/src/couch/test/eunit/couch_doc_json_tests.erl
index a004ed8fd..ae661f5d6 100644
--- a/src/couch/test/eunit/couch_doc_json_tests.erl
+++ b/src/couch/test/eunit/couch_doc_json_tests.erl
@@ -93,6 +93,16 @@ from_json_success_cases() ->
             #doc{revs = {4, [<<"230234">>]}},
             "_rev stored in revs."
         },
+        {
+            {[{<<"_rev">>, <<"0-11111111111111111111111111111111">>}]},
+            #doc{revs = {0, [<<"11111111111111111111111111111111">>]}},
+            "_rev with start 0 stored in revs (local docs case)."
+        },
+        {
+            {[{<<"_rev">>, <<"2-11111111111111111111111111111111">>}]},
+            #doc{revs = {2, [<<17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 
17, 17, 17, 17>>]}},
+            "_rev with start 1 and size 32 stored in revs."
+        },
         {
             {[{<<"soap">>, 35}]},
             #doc{body = {[{<<"soap">>, 35}]}},
@@ -287,7 +297,7 @@ from_json_error_cases() ->
             {[
                 {<<"_revisions">>,
                     {[
-                        {<<"start">>, 0},
+                        {<<"start">>, 1},
                         {<<"ids">>, [<<"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">>]}
                     ]}}
             ]},

Reply via email to