Thanks for reporting that. I installed the attached, which fixed things
in a different way that doesn't repeat the calls to re_node_set_free.From 20183123b0d4e0105ea92ac69fa4a8b4f1c69273 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 5 Jun 2025 18:52:33 -0700
Subject: [PATCH] regex: fix some leaks when pattern compilation fails
Problem reported by Alec Brown in:
https://lists.gnu.org/r/bug-gnulib/2025-06/msg00040.html
* lib/regcomp.c (create_initial_state, calc_eclosure_iter):
Fix some memory leaks when these functions fail.
---
ChangeLog | 8 ++++++++
lib/regcomp.c | 49 +++++++++++++++++++++++++++----------------------
2 files changed, 35 insertions(+), 22 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 2fdc7fb4c0..e811cb496d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2025-06-05 Paul Eggert <egg...@cs.ucla.edu>
+
+ regex: fix some leaks when pattern compilation fails
+ Problem reported by Alec Brown in:
+ https://lists.gnu.org/r/bug-gnulib/2025-06/msg00040.html
+ * lib/regcomp.c (create_initial_state, calc_eclosure_iter):
+ Fix some memory leaks when these functions fail.
+
2025-06-05 Bruno Haible <br...@clisp.org>
unictype/base, uninorm/base, etc.: Support shared libraries on MSVC.
diff --git a/lib/regcomp.c b/lib/regcomp.c
index a23f289d7a..41157e5c3a 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -1001,21 +1001,25 @@ create_initial_state (re_dfa_t *dfa)
Idx dest_idx = dfa->edests[node_idx].elems[0];
if (!re_node_set_contains (&init_nodes, dest_idx))
{
- reg_errcode_t merge_err
+ err
= re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
- if (merge_err != REG_NOERROR)
- return merge_err;
+ if (err != REG_NOERROR)
+ break;
i = 0;
}
}
}
/* It must be the first time to invoke acquire_state. */
- dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
- /* We don't check ERR here, since the initial state must not be NULL. */
+ dfa->init_state
+ = (err == REG_NOERROR
+ ? re_acquire_state_context (&err, dfa, &init_nodes, 0)
+ : NULL);
if (__glibc_unlikely (dfa->init_state == NULL))
- return err;
- if (dfa->init_state->has_constraint)
+ {
+ /* Don't check ERR here, as the initial state must not be null. */
+ }
+ else if (dfa->init_state->has_constraint)
{
dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
CONTEXT_WORD);
@@ -1025,17 +1029,13 @@ create_initial_state (re_dfa_t *dfa)
&init_nodes,
CONTEXT_NEWLINE
| CONTEXT_BEGBUF);
- if (__glibc_unlikely (dfa->init_state_word == NULL
- || dfa->init_state_nl == NULL
- || dfa->init_state_begbuf == NULL))
- return err;
}
else
dfa->init_state_word = dfa->init_state_nl
= dfa->init_state_begbuf = dfa->init_state;
re_node_set_free (&init_nodes);
- return REG_NOERROR;
+ return err;
}
/* If it is possible to do searching in single byte encoding instead of UTF-8
@@ -1677,12 +1677,11 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
{
err = duplicate_node_closure (dfa, node, node, node,
dfa->nodes[node].constraint);
- if (__glibc_unlikely (err != REG_NOERROR))
- return err;
}
/* Expand each epsilon destination nodes. */
- if (IS_EPSILON_NODE(dfa->nodes[node].type))
+ if (__glibc_likely (err == REG_NOERROR)
+ && IS_EPSILON_NODE (dfa->nodes[node].type))
for (i = 0; i < dfa->edests[node].nelem; ++i)
{
re_node_set eclosure_elem;
@@ -1700,14 +1699,14 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
{
err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false);
if (__glibc_unlikely (err != REG_NOERROR))
- return err;
+ break;
}
else
eclosure_elem = dfa->eclosures[edest];
/* Merge the epsilon closure of 'edest'. */
err = re_node_set_merge (&eclosure, &eclosure_elem);
if (__glibc_unlikely (err != REG_NOERROR))
- return err;
+ break;
/* If the epsilon closure of 'edest' is incomplete,
the epsilon closure of this node is also incomplete. */
if (dfa->eclosures[edest].nelem == 0)
@@ -1717,12 +1716,18 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
}
}
- if (incomplete && !root)
- dfa->eclosures[node].nelem = 0;
+ if (err != REG_NOERROR)
+ re_node_set_free (&eclosure);
else
- dfa->eclosures[node] = eclosure;
- *new_set = eclosure;
- return REG_NOERROR;
+ {
+ if (incomplete && !root)
+ dfa->eclosures[node].nelem = 0;
+ else
+ dfa->eclosures[node] = eclosure;
+ *new_set = eclosure;
+ }
+
+ return err;
}
/* Functions for token which are used in the parser. */
--
2.49.0