---
.gitignore | 1 +
Makefile | 1 +
README | 1 +
shuf.1 | 43 +++++++++++++++++++++
shuf.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 156 insertions(+)
create mode 100644 shuf.1
create mode 100644 shuf.c
diff --git a/.gitignore b/.gitignore
index 001b055..e789e24 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,6 +71,7 @@
/sha512sum
/sha512-224sum
/sha512-256sum
+/shuf
/sleep
/sort
/split
diff --git a/Makefile b/Makefile
index 7bd9626..2807e6b 100644
--- a/Makefile
+++ b/Makefile
@@ -160,6 +160,7 @@ BIN =\
sha512sum\
sha512-224sum\
sha512-256sum\
+ shuf\
sleep\
sort\
split\
diff --git a/README b/README
index 5555f8b..9b287d9 100644
--- a/README
+++ b/README
@@ -112,6 +112,7 @@ The following tools are implemented:
0=*|x sha512sum .
0=* x sha512-224sum .
0=* x sha512-256sum .
+0=* x shuf (-o, -z, -e, -i)
0=*|o sleep .
0#*|o sort .
0=*|o split .
diff --git a/shuf.1 b/shuf.1
new file mode 100644
index 0000000..223da4e
--- /dev/null
+++ b/shuf.1
@@ -0,0 +1,43 @@
+.Dd 2024-02-25
+.Dt SHUF 1
+.Os
+.Sh NAME
+.Nm shuf
+.Nd randomly permute input lines
+.Sh SYNOPSIS
+.Nm
+.Op Fl n Ar count
+.Op Ar file ...
+.Sh DESCRIPTION
+.Nm
+is a utility that outputs a random permutation of its input lines.
+By default,
+.Nm
+reads from stdin and outputs to stdout.
+.Sh OPTIONS
+.Bl -tag -width Ds
+.It Fl r
+Do not permute.
+Instead, choose lines at random, with replacement.
+By default, it repeats indefinitely.
+.El
+.Sh NOTES
+This
+.Nm
+misses a few options.
+.Pp
+.Fl n
+can be reproduced using
+.Xr head 1 .
+.Pp
+.Fl i
+can be reproduced using
+.Xr seq 1 .
+.Pp
+.Fl e
+can be reproduced using
+.Xr printf 1 .
+.Sh SEE ALSO
+.Xr sort 1 ,
+.Xr seq 1 ,
+.Xr head 1
diff --git a/shuf.c b/shuf.c
new file mode 100644
index 0000000..309e469
--- /dev/null
+++ b/shuf.c
@@ -0,0 +1,110 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "text.h"
+#include "util.h"
+
+/*
+ * Uniformity is achieved by generating new random numbers until the one
+ * returned is outside the range [0, 2**32 % upper_bound). This
+ * guarantees the selected random number will be inside
+ * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound)
+ * after reduction modulo upper_bound.
+ *
+ * Copied off OpenBSD (original is arc4random_uniform)
+ */
+static uint32_t
+random_uniform(uint32_t upper_bound)
+{
+ uint32_t r, min;
+
+ if (upper_bound < 2)
+ return 0;
+
+ /* 2**32 % x == (2**32 - x) % x */
+ min = -upper_bound % upper_bound;
+
+ /*
+ * This could theoretically loop forever but each retry has
+ * p > 0.5 (worst case, usually far better) of selecting a
+ * number inside the range we need, so it should rarely need
+ * to re-roll.
+ */
+ for (;;) {
+ r = random(); /* arc4random() is better, but we don't have it */
+ if (r >= min)
+ break;
+ }
+
+ return r % upper_bound;
+}
+
+static void
+usage(void)
+{
+ eprintf("usage: %s [-r] [file ...]\n", argv0);
+}
+
+int
+main(int argc, char *argv[])
+{
+ FILE *fp;
+ int i, j, rflag = 0, ret = 0;
+ struct linebuf buf = EMPTY_LINEBUF;
+ struct line line;
+
+ ARGBEGIN {
+ case 'r':
+ rflag = 1;
+ break;
+ default:
+ usage();
+ } ARGEND
+
+ if (!argc) {
+ getlines(stdin, &buf);
+ } else {
+ for (; *argv; argc--, argv++) {
+ if (!strcmp(*argv, "-")) {
+ *argv = "<stdin>";
+ fp = stdin;
+ } else if (!(fp = fopen(*argv, "r"))) {
+ weprintf("fopen %s:", *argv);
+ ret = 1;
+ continue;
+ }
+ getlines(fp, &buf);
+ if (fp != stdin && fshut(fp, *argv))
+ ret = 1;
+ }
+ }
+
+ srandom((intptr_t)buf.lines | time(NULL));
+
+ if (!rflag) {
+ for (i = buf.nlines - 1; i > 0; i--) {
+ j = random_uniform(i+1);
+ line = buf.lines[j];
+ buf.lines[j] = buf.lines[i];
+ buf.lines[i] = line;
+ }
+
+ for (i = 0; i < buf.nlines; i++) {
+ line = buf.lines[i];
+ fwrite(line.data, 1, line.len, stdout);
+ }
+ } else {
+ for (;;) {
+ j = random_uniform(buf.nlines);
+ line = buf.lines[j];
+ fwrite(line.data, 1, line.len, stdout);
+ }
+ }
+
+ ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
+
+ return ret;
+}
--
2.43.0