(couchdb) 01/01: Improve b64url benchmarking

vatamane Tue, 16 Dec 2025 00:49:24 -0800

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch improve-b64-benchmark
in repository https://gitbox.apache.org/repos/asf/couchdb.git


commit f3b1b8468de0a12db6c3df8ff1112e7ff718ac17
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Tue Dec 16 03:35:06 2025 -0500

    Improve b64url benchmarking
    
    Use erlperf as recommended in 
https://www.erlang.org/doc/system/benchmarking.html
    
     - Generate data in a more determinstic way and outside the main 
benchmarking lop
     - Benchmark encoding and decoding separately
     - Benchmark a wide range of sizes
    
    Issue: https://github.com/apache/couchdb/issues/5801
---
 src/b64url/README.md              |  51 ++++++++----
 src/b64url/benchmark.sh           |  22 +++++
 src/b64url/test/benchmark.escript | 165 --------------------------------------
 3 files changed, 58 insertions(+), 180 deletions(-)

diff --git a/src/b64url/README.md b/src/b64url/README.md
index c3a3497f2..f93b31a8d 100644
--- a/src/b64url/README.md
+++ b/src/b64url/README.md
@@ -14,24 +14,45 @@ decoding Base64 URL values:
 
 ## Performance
 
-This implementation is significantly faster than the Erlang version it replaced
-in CouchDB. The `benchmark.escript` file contains the original implementation
-(using regular expressions to replace unsafe characters in the output of the
-`base64` module) and can be used to compare the two for strings of various
-lengths. For example:
+This implementation is faster than the Erlang version in OTP 26-28,
+especially for larger binaries (1000+ bytes). To benchmark clone
+erlperf repo and run `./benchmark.sh` script. In the future, it's
+plausible Erlang OTP's base64 module may become faster than the NIF,
+due to improvements in the JIT capabilities but it's not there yet.
 
 ```
-ERL_LIBS=_build/default/lib/b64url/ ./test/benchmark.escript 4 10 100 30
-erl :       75491270 bytes /  30 seconds =     2516375.67 bps
-nif :      672299342 bytes /  30 seconds =    22409978.07 bps
-```
+./benchmark.sh
+
+[...]
+
+--- bytes: 100 -----
+Code                   ||        QPS       Time   Rel
+encode_otp_100          1    1613 Ki     620 ns  100%
+encode_nif_100          1    1391 Ki     719 ns   86%
+Code                   ||        QPS       Time   Rel
+decode_nif_100          1    1453 Ki     688 ns  100%
+decode_otp_100          1    1395 Ki     716 ns   96%
+
+[...]
 
-This test invocation spawns four workers that generate random strings between 
10
-and 100 bytes in length and then perform an encode/decode on them in a tight
-loop for 30 seconds, and then reports the aggregate encoded data volume. Note
-that the generator overhead (`crypto:strong_rand_bytes/1`) is included in these
-results, so the relative difference in encoder throughput is rather larger than
-what's reported here.
+--- bytes: 1000 -----
+Code                    ||        QPS       Time   Rel
+encode_nif_1000          1     369 Ki    2711 ns  100%
+encode_otp_1000          1     204 Ki    4904 ns   55%
+Code                    ||        QPS       Time   Rel
+decode_nif_1000          1     455 Ki    2196 ns  100%
+decode_otp_1000          1     178 Ki    5612 ns   39%
+
+[...]
+
+--- bytes: 10000000 -----
+Code                        ||        QPS       Time   Rel
+encode_nif_10000000          1         45   22388 us  100%
+encode_otp_10000000          1         19   51724 us   43%
+Code                        ||        QPS       Time   Rel
+decode_nif_10000000          1         55   18078 us  100%
+decode_otp_10000000          1         17   60020 us   30%
+```
 
 ## Timeslice Consumption
 
diff --git a/src/b64url/benchmark.sh b/src/b64url/benchmark.sh
new file mode 100755
index 000000000..d7760ed2b
--- /dev/null
+++ b/src/b64url/benchmark.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Expects erlperf to be installed
+#
+# $ git clone https://github.com/max-au/erlperf.git
+# $ cd erlperf
+# $ rebar3 as prod escriptize
+# $ cd ..
+
+for i in 50 100 150 200 500 1000 5000 10000 50000 1000000 10000000; do
+ echo ""
+ echo "--- bytes: ${i} -----"
+ ERL_LIBS="." erlperf/erlperf -w 2 \
+  'runner(Bin) -> b64url:encode(Bin).' --label "encode_nif_${i}" \
+  'runner(Bin) -> base64:encode(Bin, #{mode => urlsafe, padding => false}).' 
--label "encode_otp_${i}" \
+   --init_runner_all "rand:seed(default,{1,2,3}), rand:bytes(${i})."
+
+ ERL_LIBS="." erlperf/erlperf -w 2 \
+  'runner(Enc) -> b64url:decode(Enc).' --label "decode_nif_${i}" \
+  'runner(Enc) -> base64:decode(Enc, #{mode => urlsafe, padding => false}).' 
--label "decode_otp_${i}" \
+   --init_runner_all "rand:seed(default,{1,2,3}), 
b64url:encode(rand:bytes(round(${i} * (3/4))))."
+done
diff --git a/src/b64url/test/benchmark.escript 
b/src/b64url/test/benchmark.escript
deleted file mode 100755
index 00a6f0dda..000000000
--- a/src/b64url/test/benchmark.escript
+++ /dev/null
@@ -1,165 +0,0 @@
-#!/usr/bin/env escript
-
--mode(compile).
-
-
--export([
-    encode/1,
-    decode/1,
-    run_worker/1
-]).
-
-
--record(st, {
-    parent,
-    module,
-    workers,
-    minsize,
-    maxsize,
-    duration,
-    total_bytes
-}).
-
-
-main([Workers0, MinSize0, MaxSize0, Duration0]) ->
-    code:add_path("./ebin"),
-    code:add_path("../ebin"),
-    Workers = to_int(Workers0),
-    MinSize = to_int(MinSize0),
-    MaxSize = to_int(MaxSize0),
-    Duration = to_int(Duration0),
-    if Workers > 0 -> ok; true ->
-        die("Worker count must be positive~n")
-    end,
-    if MinSize > 0 -> ok; true ->
-        die("Minimum size must be positive.~n")
-    end,
-    if MaxSize > 0 -> ok; true ->
-        die("Maximum size must be positive.~n")
-    end,
-    if MinSize < MaxSize -> ok; true ->
-        die("Minimum size must be less than maximum size.~n")
-    end,
-    if Duration > 0 -> ok; true ->
-        die("Duration must be positive.~n")
-    end,
-    St = #st{
-        parent = self(),
-        workers = Workers,
-        minsize = MinSize,
-        maxsize = MaxSize,
-        duration = Duration
-    },
-    lists:foreach(fun(M) ->
-        run_test(St#st{module=M})
-    end, randomize([b64url, ?MODULE]));
-
-main(_) ->
-    Args = [escript:script_name()],
-    die("usage: ~s num_workers min_size max_size time_per_test~n", Args).
-
-
-run_test(St) ->
-    Workers = spawn_workers(St#st.workers, St),
-    start_workers(Workers),
-    Results = wait_for_workers(Workers),
-    report(St#st.module, St#st.duration, Results).
-
-
-start_workers(Pids) ->
-    lists:foreach(fun(P) ->
-        P ! start
-    end, Pids).
-
-
-wait_for_workers(Pids) ->
-    lists:map(fun(P) ->
-        receive
-            {P, TotalBytes} -> TotalBytes
-        end
-    end, Pids).
-
-
-report(Module, Duration, TotalByteList) ->
-    ModDesc = case Module of
-        ?MODULE -> "erl";
-        b64url -> "nif"
-    end,
-    TotalBytes = lists:sum(TotalByteList),
-    io:format("~s : ~14b bytes / ~3b seconds = ~14.2f bps~n", [
-        ModDesc, TotalBytes, Duration, TotalBytes / Duration]).
-
-
-spawn_workers(NumWorkers, St) ->
-    lists:map(fun(_) ->
-        spawn_link(?MODULE, run_worker, [St])
-    end, lists:seq(1, NumWorkers)).
-
-
-run_worker(St) ->
-    receive
-        start -> ok
-    end,
-    run_worker(St#st{total_bytes=0}, os:timestamp()).
-
-
-run_worker(St, Started) ->
-    HasRun = timer:now_diff(os:timestamp(), Started),
-    case HasRun div 1000000 > St#st.duration of
-        true ->
-            St#st.parent ! {self(), St#st.total_bytes};
-        false ->
-            NewSt = do_round_trip(St),
-            run_worker(NewSt, Started)
-    end.
-
-
-do_round_trip(St) ->
-    Size = St#st.minsize + rand:uniform(St#st.maxsize - St#st.minsize),
-    Data = crypto:strong_rand_bytes(Size),
-    Encoded = (St#st.module):encode(Data),
-    Data = (St#st.module):decode(Encoded),
-    St#st{total_bytes=St#st.total_bytes+Size}.
-
-
-encode(Url) ->
-    Url1 = iolist_to_binary(re:replace(base64:encode(Url), "=+$", "")),
-    Url2 = iolist_to_binary(re:replace(Url1, "/", "_", [global])),
-    iolist_to_binary(re:replace(Url2, "\\+", "-", [global])).
-
-
-decode(Url64) ->
-    Url1 = re:replace(iolist_to_binary(Url64), "-", "+", [global]),
-    Url2 = iolist_to_binary(
-        re:replace(iolist_to_binary(Url1), "_", "/", [global])
-    ),
-    Padding = list_to_binary(lists:duplicate((4 - size(Url2) rem 4) rem 4, 
$=)),
-    base64:decode(<<Url2/binary, Padding/binary>>).
-
-randomize(List) ->
-    List0 = [{rand:uniform(), L} || L <- List],
-    List1 = lists:sort(List0),
-    [L || {_, L} <- List1].
-
-
-to_int(Val) when is_integer(Val) ->
-    Val;
-to_int(Val) when is_binary(Val) ->
-    to_int(binary_to_list(Val));
-to_int(Val) when is_list(Val) ->
-    try
-        list_to_integer(Val)
-    catch _:_ ->
-        die("Invalid integer: ~w~n", [Val])
-    end;
-to_int(Val) ->
-    die("Invalid integer: ~w~n", [Val]).
-
-
-die(Message) ->
-    die(Message, []).
-
-die(Format, Args) ->
-    io:format(Format, Args),
-    init:stop().
-

(couchdb) 01/01: Improve b64url benchmarking

Reply via email to