On Mon, Jul 18, 2011, Alexander Bluhm wrote:
> On Sun, Jul 17, 2011 at 11:43:03AM -0400, Ted Unangst wrote:
>> I recently learned that our grep does not support the \<\> syntax for
>> word boundaries, only the somewhat more difficult to use [[:<:]] format.
>> It's fairly easy to convert one to the other however.
> 
> \< and \> are regex extensions for vi and ex.

It's not documented there though.  Just a reference to re_format.

> sed, awk, egrep, more don't support it either.
> I see no need to change grep.

well, egrep is fixed by the same patch.  sed and more (and grep) can be
fixed by the straightforward patch below instead for libc regex,
if that's what you're requesting. :)

awk uses its own regex code, not the libc version (though man awk refers
the reader to re_format) and its behavior is subtly different.  It
doesn't support [[:<:]] for instance.

The whole situation is a mess.  There are three RE languages accepted
(vi, grep/sed/more/libc, and awk), and that's ignoring BRE vs ERE, but
all three claim to implement the same re_format.  I don't think these
historical accidents need to be cast in stone.


Index: regcomp.c
===================================================================
RCS file: /home/tedu/cvs/src/lib/libc/regex/regcomp.c,v
retrieving revision 1.20
diff -u -p -r1.20 regcomp.c
--- regcomp.c   21 Nov 2010 00:02:30 -0000      1.20
+++ regcomp.c   17 Jul 2011 23:55:49 -0000
@@ -81,6 +81,7 @@ static char p_b_coll_elem(struct parse *
 static char othercase(int);
 static void bothcases(struct parse *, int);
 static void ordinary(struct parse *, int);
+static void backslash(struct parse *, int);
 static void nonnewline(struct parse *);
 static void repeat(struct parse *, sopno, int, int);
 static int seterr(struct parse *, int);
@@ -350,7 +351,7 @@ p_ere_exp(struct parse *p)
        case '\\':
                REQUIRE(MORE(), REG_EESCAPE);
                c = GETNEXT();
-               ordinary(p, c);
+               backslash(p, c);
                break;
        case '{':               /* okay as ordinary except if digit follows */
                REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
@@ -502,6 +503,12 @@ p_simp_re(struct parse *p,
        case '[':
                p_bracket(p);
                break;
+       case BACKSL|'<':
+               EMIT(OBOW, 0);
+               break;
+       case BACKSL|'>':
+               EMIT(OEOW, 0);
+               break;
        case BACKSL|'{':
                SETERROR(REG_BADRPT);
                break;
@@ -893,6 +900,25 @@ ordinary(struct parse *p, int ch)
                EMIT(OCHAR, (uch)ch);
                if (cap[ch] == 0)
                        cap[ch] = p->g->ncategories++;
+       }
+}
+
+/*
+ * do something magic with this character, but only if it's extra magic
+ */
+static void
+backslash(struct parse *p, int ch)
+{
+       switch (ch) {
+       case '<':
+               EMIT(OBOW, 0);
+               break;
+       case '>':
+               EMIT(OEOW, 0);
+               break;
+       default:
+               ordinary(p, ch);
+               break;
        }
 }

Reply via email to