On 12/15/19 10:43 AM, Arnold Robbins wrote:
> To reproduce:
> 
> 1. Checkout the gawk repo
> 2. Copy gnulib/lib/dfa.[ch] into gawk/support/.
> 3. Apply the minimal patch below

I looked into that, and the problem was not in Gnulib; it was that your minimal
patch's dfasyntax didn't clear its destination properly. (Gawk master dfa.c
diverged from Gnulib in having its dfaalloc use xzalloc rather than xmalloc, and
the minimal patch didn't capture that divergence.)

Also, it strikes me that dfacopysyntax should set to->canychar to -1, not to
from->canychar, since the canychar calculated from the old syntax shouldn't
affect the new one.

It seems to be error-prone that we're continuing to maintain a separate copy of
dfa.c for Gawk, so I suggest we unify the two copies. I attempted to do that by
installing the attached patches into Gnulib. You should now be able to use
Gnulib dfa.c as follows:

1. Checkout the gawk repo.
2. Copy gnulib/lib/dfa.[ch] and gnulib/lib/localeinfo.[ch] into gawk/support/.

Then build as usual. This works for me on GNU/Linux.
>From b079c7df25a5418554cacf0023f3ff8da763e3c7 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Mon, 16 Dec 2019 17:02:31 -0800
Subject: [PATCH 1/2] dfa: port _GL_ATTRIBUTE_MALLOC to Gawk

Gawk does not use Gnulib, and does not define _GL_ATTRIBUTE_MALLOC.
* lib/dfa.h (_GL_ATTRIBUTE_MALLOC): Define to empty
if not already defined.
---
 ChangeLog | 5 +++++
 lib/dfa.h | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 561b0ba0a..dca623c5f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2019-12-16  Paul Eggert  <egg...@cs.ucla.edu>
 
+	dfa: port _GL_ATTRIBUTE_MALLOC to Gawk
+	Gawk does not use Gnulib, and does not define _GL_ATTRIBUTE_MALLOC.
+	* lib/dfa.h (_GL_ATTRIBUTE_MALLOC): Define to empty
+	if not already defined.
+
 	dfa: remove one dependency on MB_CUR_MAX
 	* lib/dfa.c (dfamust): No need to refer to MB_CUR_MAX here.
 
diff --git a/lib/dfa.h b/lib/dfa.h
index 96c3bf1f1..09e7991bd 100644
--- a/lib/dfa.h
+++ b/lib/dfa.h
@@ -37,6 +37,11 @@ struct dfamust
 /* The dfa structure. It is completely opaque. */
 struct dfa;
 
+/* Needed when Gnulib is not used.  */
+#ifndef _GL_ATTRIBUTE_MALLOC
+# define  _GL_ATTRIBUTE_MALLOC
+#endif
+
 /* Entry points. */
 
 /* Allocate a struct dfa.  The struct dfa is completely opaque.
-- 
2.17.1

>From cbd1d57def1beaca5886d749d2f3bcb45af599ae Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Tue, 17 Dec 2019 00:20:53 -0800
Subject: [PATCH 2/2] dfa: new function dfacopysyntax

* lib/dfa.c (struct dfa): Move syntax member later so
that dfacopysyntax can easily clear earlier members.
(dfacopysyntax): New function, used by Gawk.
---
 ChangeLog |  7 +++++++
 lib/dfa.c | 19 ++++++++++++++++---
 lib/dfa.h |  7 +++++--
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index dca623c5f..9806040bb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2019-12-17  Paul Eggert  <egg...@cs.ucla.edu>
+
+	dfa: new function dfacopysyntax
+	* lib/dfa.c (struct dfa): Move syntax member later so
+	that dfacopysyntax can easily clear earlier members.
+	(dfacopysyntax): New function, used by Gawk.
+
 2019-12-16  Paul Eggert  <egg...@cs.ucla.edu>
 
 	dfa: port _GL_ATTRIBUTE_MALLOC to Gawk
diff --git a/lib/dfa.c b/lib/dfa.c
index 8e59df0c7..00347f901 100644
--- a/lib/dfa.c
+++ b/lib/dfa.c
@@ -444,9 +444,6 @@ struct parser_state
 /* A compiled regular expression.  */
 struct dfa
 {
-  /* Syntax configuration */
-  struct regex_syntax syntax;
-
   /* Fields filled by the scanner.  */
   charclass *charclasses;       /* Array of character sets for CSET tokens.  */
   idx_t cindex;			/* Index for adding new charclasses.  */
@@ -562,6 +559,10 @@ struct dfa
   state_num mb_trcount;         /* Number of transition tables for states with
                                    ANYCHAR that have actually been built.  */
 
+  /* Syntax configuration.  This is near the end so that dfacopysyntax
+     can memset up to here.  */
+  struct regex_syntax syntax;
+
   /* Information derived from the locale.  This is at the end so that
      a quick memset need not clear it specially.  */
 
@@ -4303,4 +4304,16 @@ dfasyntax (struct dfa *dfa, struct localeinfo const *linfo,
     }
 }
 
+/* Initialize TO by copying FROM's syntax settings.  */
+void
+dfacopysyntax (struct dfa *to, struct dfa const *from)
+{
+  memset (to, 0, offsetof (struct dfa, syntax));
+  to->canychar = -1;
+  to->fast = from->fast;
+  to->syntax = from->syntax;
+  to->dfaexec = from->dfaexec;
+  to->localeinfo = from->localeinfo;
+}
+
 /* vim:set shiftwidth=2: */
diff --git a/lib/dfa.h b/lib/dfa.h
index 09e7991bd..0da597fc9 100644
--- a/lib/dfa.h
+++ b/lib/dfa.h
@@ -45,6 +45,7 @@ struct dfa;
 /* Entry points. */
 
 /* Allocate a struct dfa.  The struct dfa is completely opaque.
+   It should be initialized via dfasyntax or dfacopysyntax before other use.
    The returned pointer should be passed directly to free() after
    calling dfafree() on it. */
 extern struct dfa *dfaalloc (void) _GL_ATTRIBUTE_MALLOC;
@@ -61,8 +62,7 @@ enum
     DFA_EOL_NUL = 1 << 1
   };
 
-/* Initialize or reinitialize a DFA.  This must be called before
-   any of the routines below.  The arguments are:
+/* Initialize or reinitialize a DFA.  The arguments are:
    1. The DFA to operate on.
    2. Information about the current locale.
    3. Syntax bits described in regex.h.
@@ -70,6 +70,9 @@ enum
 extern void dfasyntax (struct dfa *, struct localeinfo const *,
                        reg_syntax_t, int);
 
+/* Initialize or reinitialize a DFA from an already-initialized DFA.  */
+extern void dfacopysyntax (struct dfa *, struct dfa const *);
+
 /* Parse the given string of given length into the given struct dfa.  */
 extern void dfaparse (char const *, ptrdiff_t, struct dfa *);
 
-- 
2.17.1

Reply via email to