commit:     866af9c1fe47529bdefa02cd0e3eccfea2bebd2b
Author:     Kerin Millar <kfm <AT> plushkava <DOT> net>
AuthorDate: Sat Aug 10 08:34:25 2024 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Sun Aug 11 10:11:05 2024 +0000
URL:        
https://gitweb.gentoo.org/proj/gentoo-functions.git/commit/?id=866af9c1

Render the non-bash srandom() implementation faster

Presently, there are three implementations of srandom(), one of which is
the preferred implementation for shells other than bash. It is a little
on the slow side as it has to fork and execute both od(1) and tr(1)
every time, just to read 4 bytes. Accelerate it by having the shell
maintain its own entropy pool of up to 512 hex digits in size. Consider
the following benchmark.

i=0; while [ $((i += 1)) -le 30000 ]; do srandom; done >/dev/null

As conducted with dash on a system with a 2nd generation Intel Xeon, I
obtained the following figures.

BEFORE

real    0m49.878s
use     1m1.985s
sys     0m17.035s

AFTER

real    0m12.866s
user    0m12.559s
sys     0m0.962s

It should be noted that the optimised routine will only be utilised in
cases where the kernel is Linux and the shell has not forked itself.

$ uname
Linux
$ srandom                       # uses the fast path
$ number=$(srandom)             # subshell; probably uses the slow path
$ srandom | { read -r number; } # ditto

Still, there are conceivable use cases for which this optimisation may
prove useful. Below is an example in which it is known in advance that
up to 100 random numbers are required, and where writing them to
temporary storage is not considered to be a risk.

i=0
tmpfile=${TMPDIR:-/tmp}/random-numbers.$$.$(srandom)
while [ $((i += 1)) -le 100 ]; do
        srandom
done > "$tmpfile"
while read -r number; do
        do_something_with "$number"
done < "$tmpfile"

Signed-off-by: Kerin Millar <kfm <AT> plushkava.net>
Signed-off-by: Sam James <sam <AT> gentoo.org>

 functions.sh   | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 test-functions | 44 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 109 insertions(+), 4 deletions(-)

diff --git a/functions.sh b/functions.sh
index 38eef62..733e4e9 100644
--- a/functions.sh
+++ b/functions.sh
@@ -18,7 +18,7 @@
 
 # BASH             : whether bash-specific features may be employed
 # BASH_VERSINFO    : whether bash-specific features may be employed
-# BASHPID          : may be used by _update_columns() to detect subshells
+# BASHPID          : may be used by _update_columns() and _update_pid()
 # COLUMNS          : may be used by _update_columns() to get the column count
 # EPOCHREALTIME    : potentially used by _update_time() to get the time
 # GENFUN_MODULES   : which of the optional function collections must be sourced
@@ -587,12 +587,35 @@ srandom()
                        printf '%d\n' "0x${hex}"
                }
        elif [ -c /dev/urandom ]; then
+               unset -v genfun_entropy
+
                srandom()
                {
                        local hex
 
-                       hex=$(LC_ALL=C od -vAn -N4 -tx1 /dev/urandom | tr -d 
'[:space:]')
-                       [ "${hex}" ] && printf '%d\n' "$(( 0x${hex} >> 1 ))"
+                       # If the shell has forked itself, collect 4 bytes worth
+                       # of entropy.
+                       if ! _update_pid || [ "$$" != "${genfun_pid}" ]; then
+                               hex=$(LC_ALL=C od -vAn -N4 -tx1 /dev/urandom | 
tr -d '[:space:]')
+                               test "${#hex}" -eq 8 && printf '%d\n' "$(( 
0x${hex} >> 1 ))"
+                               return
+                       fi
+
+                       # Otherwise, employ a faster method whereby the shell
+                       # maintains an entropy pool of up to 512 hex digits in
+                       # size.
+                       if [ "${#genfun_entropy}" -lt 8 ]; then
+                               genfun_entropy=$(
+                                       LC_ALL=C od -vAn -N256 -tx1 
/dev/urandom | tr -d '[:space:]'
+                               )
+                       fi
+                       if [ "${#genfun_entropy}" -lt 8 ]; then
+                               false
+                       else
+                               hex=${genfun_entropy}
+                               genfun_entropy=${genfun_entropy%????????}
+                               printf '%d\n' "$(( 0x${hex#"$genfun_entropy"} 
>> 1 ))"
+                       fi
                }
        else
                warn "srandom: /dev/urandom doesn't exist as a character device"
@@ -848,6 +871,46 @@ _update_columns()
        _update_columns
 }
 
+#
+# Determines the PID of the current shell process. Upon success, the PID shall
+# be assigned to genfun_pid. Otherwise, the return value shall be greater than
+# 0. The obtained PID value will differ from the value of $$ under certain
+# circumstances, such as where a shell forks itself to create a subshell.
+#
+_update_pid()
+{
+       if [ "${BASH}" ]; then
+               _update_pid()
+               {
+                       # shellcheck disable=3028
+                       genfun_pid=${BASHPID}
+               }
+       elif [ -d /proc/self/task ]; then
+               # This method relies on the proc_pid_task(5) interface of Linux.
+               _update_pid()
+               {
+                       local dir tid
+
+                       for dir in /proc/self/task/*/; do
+                               if [ "${tid}" ] || [ ! -e "${dir}" ]; then
+                                       return 1
+                               else
+                                       dir=${dir%/}
+                                       tid=${dir##*/}
+                               fi
+                       done
+                       genfun_pid=${tid}
+               }
+       else
+               _update_pid()
+               {
+                       false
+               }
+       fi
+
+       _update_pid
+}
+
 #
 # Determines either the number of centiseconds elapsed since the unix epoch or
 # the number of centiseconds that the operating system has been online,

diff --git a/test-functions b/test-functions
index fe26905..51f3adf 100755
--- a/test-functions
+++ b/test-functions
@@ -454,6 +454,44 @@ test_srandom() {
        iterate_tests 2 "$@"
 }
 
+test_srandom_forked()
+{
+       set -- \
+               eq  0  unforked  \
+               eq  0  forking
+
+       callback() {
+               local mode number
+
+               shift
+               mode=$1
+               set --
+               test_description="srandom equality where $mode"
+               if [ "${mode}" = "forking" ]; then
+                       srandom
+                       ( srandom )
+                       srandom
+                       ( srandom )
+               else
+                       srandom
+                       srandom
+                       srandom
+                       srandom
+               fi > random_numbers || return
+               while read -r number; do
+                       set -- "$@" "${number}"
+               done < random_numbers
+               test_description="srandom equality where $mode ($*)"
+               if [ "${mode}" = "forking" ]; then
+                       test "$#" -eq 4 && test "$1" -ne "$2" && test "$3" -ne 
"$4"
+               else
+                       test "$#" -eq 4 && ! trueof_all test "$1" -eq -- "$@"
+               fi
+       }
+
+       iterate_tests 3 "$@"
+}
+
 test_newest() {
        set -- \
                ge  1  non-existent  non-existent  \
@@ -1212,7 +1250,11 @@ else
        test_yesno || rc=1
        test_die || rc=1
        test_edo || rc=1
-       test_srandom || rc=1
+       if ! test_srandom; then
+               rc=1
+       else
+               test_srandom_forked || rc=1
+       fi
        test_newest || rc=1
        test_oldest || rc=1
        test_trim || rc=1

Reply via email to