This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch improve-b64-benchmark in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit f3b1b8468de0a12db6c3df8ff1112e7ff718ac17 Author: Nick Vatamaniuc <[email protected]> AuthorDate: Tue Dec 16 03:35:06 2025 -0500 Improve b64url benchmarking Use erlperf as recommended in https://www.erlang.org/doc/system/benchmarking.html - Generate data in a more determinstic way and outside the main benchmarking lop - Benchmark encoding and decoding separately - Benchmark a wide range of sizes Issue: https://github.com/apache/couchdb/issues/5801 --- src/b64url/README.md | 51 ++++++++---- src/b64url/benchmark.sh | 22 +++++ src/b64url/test/benchmark.escript | 165 -------------------------------------- 3 files changed, 58 insertions(+), 180 deletions(-) diff --git a/src/b64url/README.md b/src/b64url/README.md index c3a3497f2..f93b31a8d 100644 --- a/src/b64url/README.md +++ b/src/b64url/README.md @@ -14,24 +14,45 @@ decoding Base64 URL values: ## Performance -This implementation is significantly faster than the Erlang version it replaced -in CouchDB. The `benchmark.escript` file contains the original implementation -(using regular expressions to replace unsafe characters in the output of the -`base64` module) and can be used to compare the two for strings of various -lengths. For example: +This implementation is faster than the Erlang version in OTP 26-28, +especially for larger binaries (1000+ bytes). To benchmark clone +erlperf repo and run `./benchmark.sh` script. In the future, it's +plausible Erlang OTP's base64 module may become faster than the NIF, +due to improvements in the JIT capabilities but it's not there yet. ``` -ERL_LIBS=_build/default/lib/b64url/ ./test/benchmark.escript 4 10 100 30 -erl : 75491270 bytes / 30 seconds = 2516375.67 bps -nif : 672299342 bytes / 30 seconds = 22409978.07 bps -``` +./benchmark.sh + +[...] + +--- bytes: 100 ----- +Code || QPS Time Rel +encode_otp_100 1 1613 Ki 620 ns 100% +encode_nif_100 1 1391 Ki 719 ns 86% +Code || QPS Time Rel +decode_nif_100 1 1453 Ki 688 ns 100% +decode_otp_100 1 1395 Ki 716 ns 96% + +[...] -This test invocation spawns four workers that generate random strings between 10 -and 100 bytes in length and then perform an encode/decode on them in a tight -loop for 30 seconds, and then reports the aggregate encoded data volume. Note -that the generator overhead (`crypto:strong_rand_bytes/1`) is included in these -results, so the relative difference in encoder throughput is rather larger than -what's reported here. +--- bytes: 1000 ----- +Code || QPS Time Rel +encode_nif_1000 1 369 Ki 2711 ns 100% +encode_otp_1000 1 204 Ki 4904 ns 55% +Code || QPS Time Rel +decode_nif_1000 1 455 Ki 2196 ns 100% +decode_otp_1000 1 178 Ki 5612 ns 39% + +[...] + +--- bytes: 10000000 ----- +Code || QPS Time Rel +encode_nif_10000000 1 45 22388 us 100% +encode_otp_10000000 1 19 51724 us 43% +Code || QPS Time Rel +decode_nif_10000000 1 55 18078 us 100% +decode_otp_10000000 1 17 60020 us 30% +``` ## Timeslice Consumption diff --git a/src/b64url/benchmark.sh b/src/b64url/benchmark.sh new file mode 100755 index 000000000..d7760ed2b --- /dev/null +++ b/src/b64url/benchmark.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# Expects erlperf to be installed +# +# $ git clone https://github.com/max-au/erlperf.git +# $ cd erlperf +# $ rebar3 as prod escriptize +# $ cd .. + +for i in 50 100 150 200 500 1000 5000 10000 50000 1000000 10000000; do + echo "" + echo "--- bytes: ${i} -----" + ERL_LIBS="." erlperf/erlperf -w 2 \ + 'runner(Bin) -> b64url:encode(Bin).' --label "encode_nif_${i}" \ + 'runner(Bin) -> base64:encode(Bin, #{mode => urlsafe, padding => false}).' --label "encode_otp_${i}" \ + --init_runner_all "rand:seed(default,{1,2,3}), rand:bytes(${i})." + + ERL_LIBS="." erlperf/erlperf -w 2 \ + 'runner(Enc) -> b64url:decode(Enc).' --label "decode_nif_${i}" \ + 'runner(Enc) -> base64:decode(Enc, #{mode => urlsafe, padding => false}).' --label "decode_otp_${i}" \ + --init_runner_all "rand:seed(default,{1,2,3}), b64url:encode(rand:bytes(round(${i} * (3/4))))." +done diff --git a/src/b64url/test/benchmark.escript b/src/b64url/test/benchmark.escript deleted file mode 100755 index 00a6f0dda..000000000 --- a/src/b64url/test/benchmark.escript +++ /dev/null @@ -1,165 +0,0 @@ -#!/usr/bin/env escript - --mode(compile). - - --export([ - encode/1, - decode/1, - run_worker/1 -]). - - --record(st, { - parent, - module, - workers, - minsize, - maxsize, - duration, - total_bytes -}). - - -main([Workers0, MinSize0, MaxSize0, Duration0]) -> - code:add_path("./ebin"), - code:add_path("../ebin"), - Workers = to_int(Workers0), - MinSize = to_int(MinSize0), - MaxSize = to_int(MaxSize0), - Duration = to_int(Duration0), - if Workers > 0 -> ok; true -> - die("Worker count must be positive~n") - end, - if MinSize > 0 -> ok; true -> - die("Minimum size must be positive.~n") - end, - if MaxSize > 0 -> ok; true -> - die("Maximum size must be positive.~n") - end, - if MinSize < MaxSize -> ok; true -> - die("Minimum size must be less than maximum size.~n") - end, - if Duration > 0 -> ok; true -> - die("Duration must be positive.~n") - end, - St = #st{ - parent = self(), - workers = Workers, - minsize = MinSize, - maxsize = MaxSize, - duration = Duration - }, - lists:foreach(fun(M) -> - run_test(St#st{module=M}) - end, randomize([b64url, ?MODULE])); - -main(_) -> - Args = [escript:script_name()], - die("usage: ~s num_workers min_size max_size time_per_test~n", Args). - - -run_test(St) -> - Workers = spawn_workers(St#st.workers, St), - start_workers(Workers), - Results = wait_for_workers(Workers), - report(St#st.module, St#st.duration, Results). - - -start_workers(Pids) -> - lists:foreach(fun(P) -> - P ! start - end, Pids). - - -wait_for_workers(Pids) -> - lists:map(fun(P) -> - receive - {P, TotalBytes} -> TotalBytes - end - end, Pids). - - -report(Module, Duration, TotalByteList) -> - ModDesc = case Module of - ?MODULE -> "erl"; - b64url -> "nif" - end, - TotalBytes = lists:sum(TotalByteList), - io:format("~s : ~14b bytes / ~3b seconds = ~14.2f bps~n", [ - ModDesc, TotalBytes, Duration, TotalBytes / Duration]). - - -spawn_workers(NumWorkers, St) -> - lists:map(fun(_) -> - spawn_link(?MODULE, run_worker, [St]) - end, lists:seq(1, NumWorkers)). - - -run_worker(St) -> - receive - start -> ok - end, - run_worker(St#st{total_bytes=0}, os:timestamp()). - - -run_worker(St, Started) -> - HasRun = timer:now_diff(os:timestamp(), Started), - case HasRun div 1000000 > St#st.duration of - true -> - St#st.parent ! {self(), St#st.total_bytes}; - false -> - NewSt = do_round_trip(St), - run_worker(NewSt, Started) - end. - - -do_round_trip(St) -> - Size = St#st.minsize + rand:uniform(St#st.maxsize - St#st.minsize), - Data = crypto:strong_rand_bytes(Size), - Encoded = (St#st.module):encode(Data), - Data = (St#st.module):decode(Encoded), - St#st{total_bytes=St#st.total_bytes+Size}. - - -encode(Url) -> - Url1 = iolist_to_binary(re:replace(base64:encode(Url), "=+$", "")), - Url2 = iolist_to_binary(re:replace(Url1, "/", "_", [global])), - iolist_to_binary(re:replace(Url2, "\\+", "-", [global])). - - -decode(Url64) -> - Url1 = re:replace(iolist_to_binary(Url64), "-", "+", [global]), - Url2 = iolist_to_binary( - re:replace(iolist_to_binary(Url1), "_", "/", [global]) - ), - Padding = list_to_binary(lists:duplicate((4 - size(Url2) rem 4) rem 4, $=)), - base64:decode(<<Url2/binary, Padding/binary>>). - -randomize(List) -> - List0 = [{rand:uniform(), L} || L <- List], - List1 = lists:sort(List0), - [L || {_, L} <- List1]. - - -to_int(Val) when is_integer(Val) -> - Val; -to_int(Val) when is_binary(Val) -> - to_int(binary_to_list(Val)); -to_int(Val) when is_list(Val) -> - try - list_to_integer(Val) - catch _:_ -> - die("Invalid integer: ~w~n", [Val]) - end; -to_int(Val) -> - die("Invalid integer: ~w~n", [Val]). - - -die(Message) -> - die(Message, []). - -die(Format, Args) -> - io:format(Format, Args), - init:stop(). -
