On 8/26/21 4:58 PM, Bruno Haible wrote:
Oops, my suggestion was not right. Instead, what needs to be done
in order to preserve good diagnostics with GCC 10 is to keep the
__access__ attribute, even with the VLA.

Yes, it appears I was too ambitious in my patch. I have reverted much of it and the result is that yesterday's regex patches (counting the typo fix), when combined, amount to the attached.

To answer some of your earlier questions, here are some examples:

  void f (int n, int *a) { }
  void g (int n, int a[n]) { }
  void h (int n, int a[static n]) { }
  __attribute__ ((access (read_write, 2, 1))) void i (int n, int *a) { }

f and g are the same, except that (1) a conforming C11 compiler can reject g if __STDC_NO_VLA__ is defined, (2) if you use g's style in a function declaration you should also use g's style in the function's definition (otherwise there's a type-compatibility problem), recent versions of GCC can warn if you pass an array to g whose size is too small for n.

h differs from g in that the C standard requires that every call to h must pass a null pointer to an array of at least n elements. (As you say, the "at least" is redundant but the standard is intentionally redundant here, to remove possibilities for confusion.) h is intended for optimization, so that the compiler can generate code to access the first n elements of a at function entry, regardless of whether the C code actually accesses those elements. Such an optimization is not allowed for f or g.

The syntax used for 'i' is more flexible and powerful than the syntax used for h, which is why glibc is using i-like syntax. Among other things, you can use i-like syntax only in a function declaration; you need not repeat it in the function's definition.

Unfortunately, for regexec there is no i-like syntax that is appropriate, since any of the access modes 'read', 'write', 'read_write', 'none' are incorrect for some valid and reasonable regexec calls. Also, an h-like syntax is not appropriate since it's reasonable to pass regexec a null pointer in some cases. Because of this, for regexec we can use only f-like or g-like syntax. g-like syntax is better when supported, as it lets GCC warn if you pass an array of inappropriate size to regexec (this warning will be incorrect for valid but screwy regexec calls, but that's OK).

My previous patch attempted to use i-like syntax for internal regex functions. However, it's not clear it's worth doing that, and anyway it's not necessary to solve the problem Martin Sebor originally raised in <https://sourceware.org/pipermail/libc-alpha/2021-August/130137.html>. So my current patch limits itself to Martin's issue.

I'll CC this to Martin to let him know that my suggested patches have hit Gnulib. The intent is to sync Gnulib to Glibc.
diff --git a/ChangeLog b/ChangeLog
index 785fbc44b..902c5262c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2021-08-26  Paul Eggert  <egg...@cs.ucla.edu>
+
+	regex: use __attr_access and C99-style array arg
+	This should help with some static checking.
+	Derived from a suggestion by Martin Sebor in:
+	https://sourceware.org/pipermail/libc-alpha/2021-August/130336.html
+	* lib/regex.c: Ignore -Wvla for the whole file.
+	* lib/regex.h (_REGEX_NELTS, _Attr_access_): New macros.
+	Ignore -Wvla when declaring regexec.
+	(re_compile_pattern, re_search, re_search2, re_match, re_match_2)
+	(regerror): Mark with _Attr_access_, as glibc does.
+	* lib/regex.h (regexec):
+	* lib/regexec.c (regexec, __compat_regexec):
+	Use _REGEX_NELTS for each array parameter whose size is another arg
+	but with an access pattern that cannot be captured with __attr_access.
+
 2021-08-25  Bruno Haible  <br...@clisp.org>
 
 	execute tests: Fix test failure when libtool is in use.
diff --git a/lib/regex.c b/lib/regex.c
index 7296be0f0..d32863972 100644
--- a/lib/regex.c
+++ b/lib/regex.c
@@ -24,6 +24,7 @@
 
 # if __GNUC_PREREQ (4, 6)
 #  pragma GCC diagnostic ignored "-Wsuggest-attribute=pure"
+#  pragma GCC diagnostic ignored "-Wvla"
 # endif
 # if __GNUC_PREREQ (4, 3)
 #  pragma GCC diagnostic ignored "-Wold-style-definition"
diff --git a/lib/regex.h b/lib/regex.h
index 8e4ef4557..adb69768e 100644
--- a/lib/regex.h
+++ b/lib/regex.h
@@ -522,6 +522,30 @@ typedef struct
 
 /* Declarations for routines.  */
 
+#ifndef _REGEX_NELTS
+# if (defined __STDC_VERSION__ && 199901L <= __STDC_VERSION__ \
+	&& !defined __STDC_NO_VLA__)
+#  define _REGEX_NELTS(n) n
+# else
+#  define _REGEX_NELTS(n)
+# endif
+#endif
+
+#if defined __GNUC__ && 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wvla"
+#endif
+
+#ifndef _Attr_access_
+# ifdef __attr_access
+#  define _Attr_access_(arg) __attr_access (arg)
+# elif defined __GNUC__ && 10 <= __GNUC__
+#  define _Attr_access_(x) __attribute__ ((__access__ x))
+# else
+#  define _Attr_access_(x)
+# endif
+#endif
+
 #ifdef __USE_GNU
 /* Sets the current default syntax to SYNTAX, and return the old syntax.
    You can also simply assign to the 're_syntax_options' variable.  */
@@ -536,7 +560,8 @@ extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
    'regcomp', with a malloc'ed value, or set to NULL before calling
    'regfree'.  */
 extern const char *re_compile_pattern (const char *__pattern, size_t __length,
-				       struct re_pattern_buffer *__buffer);
+				       struct re_pattern_buffer *__buffer)
+    _Attr_access_ ((__read_only__, 1, 2));
 
 
 /* Compile a fastmap for the compiled pattern in BUFFER; used to
@@ -553,7 +578,8 @@ extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
 extern regoff_t re_search (struct re_pattern_buffer *__buffer,
 			   const char *__String, regoff_t __length,
 			   regoff_t __start, regoff_t __range,
-			   struct re_registers *__regs);
+			   struct re_registers *__regs)
+    _Attr_access_ ((__read_only__, 2, 3));
 
 
 /* Like 're_search', but search in the concatenation of STRING1 and
@@ -563,14 +589,17 @@ extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer,
 			     const char *__string2, regoff_t __length2,
 			     regoff_t __start, regoff_t __range,
 			     struct re_registers *__regs,
-			     regoff_t __stop);
+			     regoff_t __stop)
+    _Attr_access_ ((__read_only__, 2, 3))
+    _Attr_access_ ((__read_only__, 4, 5));
 
 
 /* Like 're_search', but return how many characters in STRING the regexp
    in BUFFER matched, starting at position START.  */
 extern regoff_t re_match (struct re_pattern_buffer *__buffer,
 			  const char *__String, regoff_t __length,
-			  regoff_t __start, struct re_registers *__regs);
+			  regoff_t __start, struct re_registers *__regs)
+    _Attr_access_ ((__read_only__, 2, 3));
 
 
 /* Relates to 're_match' as 're_search_2' relates to 're_search'.  */
@@ -578,7 +607,9 @@ extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer,
 			    const char *__string1, regoff_t __length1,
 			    const char *__string2, regoff_t __length2,
 			    regoff_t __start, struct re_registers *__regs,
-			    regoff_t __stop);
+			    regoff_t __stop)
+    _Attr_access_ ((__read_only__, 2, 3))
+    _Attr_access_ ((__read_only__, 4, 5));
 
 
 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
@@ -647,14 +678,19 @@ extern int regcomp (regex_t *_Restrict_ __preg,
 
 extern int regexec (const regex_t *_Restrict_ __preg,
 		    const char *_Restrict_ __String, size_t __nmatch,
-		    regmatch_t __pmatch[_Restrict_arr_],
+		    regmatch_t __pmatch[_Restrict_arr_
+					_REGEX_NELTS (__nmatch)],
 		    int __eflags);
 
 extern size_t regerror (int __errcode, const regex_t *_Restrict_ __preg,
-			char *_Restrict_ __errbuf, size_t __errbuf_size);
+			char *_Restrict_ __errbuf, size_t __errbuf_size)
+    _Attr_access_ ((__write_only__, 3, 4));
 
 extern void regfree (regex_t *__preg);
 
+#if defined __GNUC__ && 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
+# pragma GCC diagnostic pop
+#endif
 
 #ifdef __cplusplus
 }
diff --git a/lib/regexec.c b/lib/regexec.c
index 5e4eb497a..83e9aaf8c 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -191,7 +191,7 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
 
 int
 regexec (const regex_t *__restrict preg, const char *__restrict string,
-	 size_t nmatch, regmatch_t pmatch[], int eflags)
+	 size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
 {
   reg_errcode_t err;
   Idx start, length;
@@ -235,7 +235,7 @@ int
 attribute_compat_text_section
 __compat_regexec (const regex_t *__restrict preg,
 		  const char *__restrict string, size_t nmatch,
-		  regmatch_t pmatch[], int eflags)
+		  regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
 {
   return regexec (preg, string, nmatch, pmatch,
 		  eflags & (REG_NOTBOL | REG_NOTEOL));

Reply via email to