Hi,
a new contributor here. I was browsing through the sources of sbase and decided to tackle the missing unexpand.

Patch attached. Implements unexpand and adds man page for it plus adds a mention of unexpand to man page of expand. Also updates TODO, README, LICENSE and Makefile accordingly.

 - Tuukka
diff --git a/LICENSE b/LICENSE
index d46b353..ac9bee2 100644
--- a/LICENSE
+++ b/LICENSE
@@ -26,6 +26,7 @@ MIT/X Consortium License
 © 2014 Silvan Jegen <s.je...@gmail.com>
 © 2014 Laslo Hunhold <d...@frign.de>
 © 2014 Daniel Bainton <d...@driftaway.org>
+© 2014 Tuukka Kataja <st...@xor.fi>
 
 Permission is hereby granted, free of charge, to any person obtaining a
 copy of this software and associated documentation files (the "Software"),
diff --git a/Makefile b/Makefile
index 27a6693..5919c8d 100644
--- a/Makefile
+++ b/Makefile
@@ -89,6 +89,7 @@ SRC = \
 	uudecode.c \
 	uuencode.c \
 	uname.c    \
+	unexpand.c \
 	uniq.c     \
 	unlink.c   \
 	seq.c      \
diff --git a/README b/README
index 39a6857..1a7f74d 100644
--- a/README
+++ b/README
@@ -11,8 +11,8 @@ The following programs are currently implemented:
     kill ln  ls mc md5sum mkdir  mkfifo mktemp mv nice  nl nohup paste
     printenv  pwd readlink  renice rm  rmdir sleep  setsid sort  split
     sponge strings sync  tail tar tee test touch tr  true tty uudecode
-    uuencode  uname uniq  unlink  seq sha1sum  sha256sum sha512sum  wc
-    xargs yes
+    uuencode  uname   unexpand  uniq  unlink  seq   sha1sum  sha256sum
+    sha512sum wc xargs yes
 
 sbase is mostly following POSIX but we deviate wherever we think it is
 appropriate.
diff --git a/TODO b/TODO
index 59bbd24..288f158 100644
--- a/TODO
+++ b/TODO
@@ -11,8 +11,6 @@ test [expression...]
 tr:
 	support for character classes [:alnum:]
 
-unexpand [-a] [-t N] [file...]
-
 od/hd
 
 uuencode, uudecode:
diff --git a/expand.1 b/expand.1
index 82be286..98c66ab 100644
--- a/expand.1
+++ b/expand.1
@@ -18,4 +18,5 @@ Expand tabs to
 .I n
 spaces.  We currently support only a single numerical argument.
 .SH SEE ALSO
+.IR unexpand (1),
 .IR fold (1)
diff --git a/unexpand.1 b/unexpand.1
new file mode 100644
index 0000000..62dd957
--- /dev/null
+++ b/unexpand.1
@@ -0,0 +1,25 @@
+.TH EXPAND 1 sbase\-VERSION
+.SH NAME
+unexpand \- convert blanks to tabs
+.SH SYNOPSIS
+.B unexpand
+.RB [ \-a ]
+.RB [ \-t
+.IR n ]
+.RI [ file ...]
+.SH DESCRIPTION
+unexpand processes the named files or the standard input, writing the
+standard output with consecutive blanks (spaces and tabs) converted
+into tabs. Backspace characters are preserved into the output and
+decrement the column count for tab calculations.
+.SH OPTIONS
+.TP
+.BI \-a
+convert blanks to tabs everywhere, not just at the start of lines
+.TP
+.BI \-t " n"
+set tab size to
+.I n
+spaces (default: 8)
+.SH SEE ALSO
+.IR expand (1)
diff --git a/unexpand.c b/unexpand.c
new file mode 100644
index 0000000..b0ef660
--- /dev/null
+++ b/unexpand.c
@@ -0,0 +1,140 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include "util.h"
+
+typedef struct {
+	FILE *fp;
+	const char *name;
+} Fdescr;
+
+static void unexpand(Fdescr *dsc);
+
+static bool aflag = false;
+static int tabsize = 8;
+
+static void
+usage(void)
+{
+	eprintf("usage: %s [-a] [-t n] [file ...]\n", argv0);
+}
+
+int
+main(int argc, char *argv[])
+{
+	Fdescr dsc;
+	FILE *fp;
+
+	ARGBEGIN {
+	case 't':
+		tabsize = estrtol(EARGF(usage()), 0);
+		if(tabsize <= 0)
+			eprintf("unexpand: invalid tabsize\n", argv[0]);
+		/* Fallthrough: -t implies -a */
+	case 'a':
+		aflag = true;
+		break;
+	default:
+		usage();
+	} ARGEND;
+
+	if (argc == 0) {
+		dsc.name = "<stdin>";
+		dsc.fp = stdin;
+		unexpand(&dsc);
+	} else {
+		for (; argc > 0; argc--, argv++) {
+			if (!(fp = fopen(*argv, "r"))) {
+				weprintf("fopen %s:", *argv);
+				continue;
+			}
+			dsc.name = *argv;
+			dsc.fp = fp;
+			unexpand(&dsc);
+			fclose(fp);
+		}
+	}
+
+	return EXIT_SUCCESS;
+}
+
+static wint_t
+in(Fdescr *f)
+{
+	wint_t c = fgetwc(f->fp);
+
+	if (c == WEOF && ferror(f->fp))
+		eprintf("'%s' read error:", f->name);
+
+	return c;
+}
+
+static void
+out(wint_t c)
+{
+	putwchar(c);
+	if (ferror(stdout))
+		eprintf("write error:");
+}
+
+static void
+unexpandspan(unsigned int n, unsigned int col)
+{
+	unsigned int off = (col-n) % tabsize;
+
+	if(n + off >= tabsize && n > 1)
+		n += off;
+
+	for(; n >= tabsize; n -= tabsize)
+		out('\t');
+	while(n--)
+		out(' ');
+}
+
+static void
+unexpand(Fdescr *dsc)
+{
+	unsigned int n = 0, col = 0;
+	bool bol = true;
+	wint_t c;
+
+	while ((c = in(dsc)) != EOF) {
+		switch (c) {
+		case ' ':
+			if (bol || aflag)
+				n++;
+			col++;
+			break;
+		case '\t':
+			if (bol || aflag)
+				n += tabsize - col % tabsize;
+			col += tabsize - col % tabsize;
+			break;
+		case '\b':
+			if (bol || aflag)
+				unexpandspan(n, col);
+			col -= (col > 0);
+			n = 0;
+			bol = false;
+			break;
+		case '\n':
+			if (bol || aflag)
+				unexpandspan(n, col);
+			n = col = 0;
+			bol = true;
+			break;
+		default:
+			if (bol || aflag)
+				unexpandspan(n, col);
+			n = 0;
+			col++;
+			bol = false;
+		}
+		if ((c != ' ' && c != '\t') || (!aflag && !bol))
+			out(c);
+	}
+	if (n > 0 && (bol || aflag))
+	  unexpandspan(n, col);
+}

Reply via email to