This is an automated email from the ASF dual-hosted git repository.
bnolsen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push:
new 891d348dff regex_remap: convert from pcre to Regex (#12575)
891d348dff is described below
commit 891d348dff9c9e804900d9aaeef5d4c78f9e3f69
Author: Brian Olsen <[email protected]>
AuthorDate: Thu Nov 6 11:52:11 2025 -0700
regex_remap: convert from pcre to Regex (#12575)
* regex_remap: convert from pcre to Regex
* get RegexMatchContext helper class working
* cleanup
* remove pcre2 match context features not in centos
* add back in comments from the pcre plugin
* refactor Regex::captureCount(), add unit tests
* change Regex api function convention, add to unit test
* better handling of regex exec call failure
* use errmsg in diags.log, restore older match limit test
---
include/tsutil/Regex.h | 52 ++++++-
plugins/experimental/cookie_remap/cookie_remap.cc | 2 +-
plugins/regex_remap/CMakeLists.txt | 2 +-
plugins/regex_remap/regex_remap.cc | 151 ++++++++++-----------
src/proxy/http/remap/RemapConfig.cc | 2 +-
src/tsutil/Regex.cc | 127 +++++++++++++++--
src/tsutil/unit_tests/test_Regex.cc | 54 +++++++-
.../pluginTest/regex_remap/regex_remap.test.py | 18 ++-
.../regex_remap/replay/yts-2819.replay.json | 74 ++++++++++
9 files changed, 380 insertions(+), 102 deletions(-)
diff --git a/include/tsutil/Regex.h b/include/tsutil/Regex.h
index cd8d7c1cb4..cc5260c24f 100644
--- a/include/tsutil/Regex.h
+++ b/include/tsutil/Regex.h
@@ -94,6 +94,41 @@ private:
_MatchDataPtr _match_data;
};
+/// @brief Wrapper for PCRE2 match context
+///
+/// @internal This instance is not tied to any Regex and can be used with one
of the Regex::exec overloads.
+class RegexMatchContext
+{
+ friend class Regex;
+
+public:
+ /** Construct a new RegexMatchContext object.
+ */
+ RegexMatchContext();
+ ~RegexMatchContext();
+
+ /// uses pcre2_match_context_copy for a deep copy.
+ RegexMatchContext(RegexMatchContext const &orig);
+ RegexMatchContext &operator=(RegexMatchContext const &orig);
+
+ RegexMatchContext(RegexMatchContext &&) = default;
+ RegexMatchContext &operator=(RegexMatchContext &&) = default;
+
+ /** Limits the amount of backtracking that can take place.
+ * Any regex exec call that fails will return PCRE2_ERROR_MATCHLIMIT(-47)
+ */
+ void set_match_limit(uint32_t limit);
+
+private:
+ /// @internal This wraps a void* so to avoid requiring a pcre2 include.
+ struct _MatchContext;
+ struct _MatchContextPtr {
+ void *_ptr = nullptr;
+ };
+
+ _MatchContextPtr _match_context;
+};
+
/// @brief Wrapper for PCRE2 regular expression.
class Regex
{
@@ -179,6 +214,7 @@ public:
* @param subject String to match against.
* @param matches Place to store the capture groups.
* @param flags Match flags (e.g., RE_NOTEMPTY).
+ * @param optional context Match context (set matching limits).
* @return @c The number of capture groups. < 0 if an error occurred. 0 if
the number of Matches is too small.
*
* It is safe to call this method concurrently on the same instance of @a
this.
@@ -186,10 +222,20 @@ public:
* Each capture group takes 3 elements of @a ovector, therefore @a ovecsize
must
* be a multiple of 3 and at least three times the number of desired capture
groups.
*/
- int exec(std::string_view subject, RegexMatches &matches, uint32_t flags)
const;
+ int exec(std::string_view subject, RegexMatches &matches, uint32_t flags,
+ RegexMatchContext const *const matchContext = nullptr) const;
+
+ /** Error string for exec failure.
+ *
+ * @param int return code from exec call.
+ */
+ static std::string get_error_string(int rc);
+
+ /// @return The number of capture groups in the compiled pattern, -1 for
fail.
+ int32_t get_capture_count() const;
- /// @return The number of capture groups in the compiled pattern.
- int get_capture_count();
+ /// @return number of highest back references, -1 for fail.
+ int32_t get_backref_max() const;
/// @return Is the compiled pattern empty?
bool empty() const;
diff --git a/plugins/experimental/cookie_remap/cookie_remap.cc
b/plugins/experimental/cookie_remap/cookie_remap.cc
index ce7f5caeb4..f29033f4c6 100644
--- a/plugins/experimental/cookie_remap/cookie_remap.cc
+++ b/plugins/experimental/cookie_remap/cookie_remap.cc
@@ -445,7 +445,7 @@ private:
Regex *regex = nullptr;
std::string regex_string;
- int regex_ccount = 0;
+ int32_t regex_ccount = 0;
std::string bucket;
unsigned int how_many = 0;
diff --git a/plugins/regex_remap/CMakeLists.txt
b/plugins/regex_remap/CMakeLists.txt
index a69af3590d..2a233eedc3 100644
--- a/plugins/regex_remap/CMakeLists.txt
+++ b/plugins/regex_remap/CMakeLists.txt
@@ -17,6 +17,6 @@
add_atsplugin(regex_remap regex_remap.cc)
-target_link_libraries(regex_remap PRIVATE PCRE::PCRE libswoc::libswoc)
+target_link_libraries(regex_remap PRIVATE libswoc::libswoc)
verify_remap_plugin(regex_remap)
diff --git a/plugins/regex_remap/regex_remap.cc
b/plugins/regex_remap/regex_remap.cc
index de38ba2378..18838291e2 100644
--- a/plugins/regex_remap/regex_remap.cc
+++ b/plugins/regex_remap/regex_remap.cc
@@ -45,17 +45,14 @@
#include "tscore/ink_time.h"
#include "tscore/ink_inet.h"
-#ifdef HAVE_PCRE_PCRE_H
-#include <pcre/pcre.h>
-#else
-#include <pcre.h>
-#endif
+#include "tsutil/Regex.h"
static const char *PLUGIN_NAME = "regex_remap";
// Constants
-static const int OVECCOUNT = 30; // We support $0 - $9 x2 ints, and this needs
to be 1.5x that
-static const int MAX_SUBS = 32; // No more than 32 substitution variables in
the subst string
+static const int MATCHCOUNT = 15; // We support $0 - $9 x2 ints,
and this needs to be 1.5x that
+static const int MAX_SUBS = 32; // No more than 32 substitution
variables in the subst string
+static const int32_t REGEX_MATCH_LIMIT = 1750; // POOMA - also dependent on
actual stack size. Crashes with previous value of 2047
// Substitutions other than regex matches
enum ExtraSubstitutions {
@@ -117,13 +114,6 @@ public:
Dbg(dbg_ctl, "Calling destructor");
TSfree(_rex_string);
TSfree(_subst);
-
- if (_rex) {
- pcre_free(_rex);
- }
- if (_extra) {
- pcre_free(_extra);
- }
}
bool initialize(const std::string ®, const std::string &sub, const
std::string &opt);
@@ -140,25 +130,25 @@ public:
fprintf(stderr, "[%s]: Regex %d ( %s ): %.2f%%\n", now, ix,
_rex_string, 100.0 * _hits / max);
}
- int compile(const char *&error, int &erroffset);
+ // Returns '0' on success
+ int compile(std::string &error, int &erroffset);
- // Perform the regular expression matching against a string.
+ // number of matches, or negative if failed
int
- match(const char *str, int len, int ovector[])
+ match(std::string_view const str, RegexMatches &matches) const
{
- return pcre_exec(_rex, // the compiled pattern
- _extra, // Extra data from study (maybe)
- str, // the subject string
- len, // the length of the subject
- 0, // start at offset 0 in the subject
- 0, // default options
- ovector, // output vector for substring information
- OVECCOUNT); // number of elements in the output vector
+ TSAssert(nullptr != _match_context);
+ int const stat = _rex.exec(str, matches, 0, _match_context);
+ if (0 <= stat) {
+ Dbg(dbg_ctl, "Regex match (%d): %.*s", stat, (int)str.length(),
str.data());
+ return matches.size();
+ }
+ return stat;
}
// Substitutions
- int get_lengths(const int ovector[], int lengths[], TSRemapRequestInfo *rri,
UrlComponents *req_url);
- int substitute(char dest[], const char *src, const int ovector[], const int
lengths[], TSHttpTxn txnp, TSRemapRequestInfo *rri,
+ int get_lengths(RegexMatches const &matches, int lengths[],
TSRemapRequestInfo *rri, UrlComponents *req_url);
+ int substitute(char dest[], RegexMatches const &matches, const int
lengths[], TSHttpTxn txnp, TSRemapRequestInfo *rri,
UrlComponents *req_url, bool lowercase_substitutions);
// setter / getters for members the linked list.
@@ -173,6 +163,12 @@ public:
return _next;
}
+ inline void
+ set_match_context(RegexMatchContext const *const ctx)
+ {
+ _match_context = ctx;
+ }
+
// setter / getters for order number within the linked list
inline void
set_order(int order)
@@ -263,10 +259,10 @@ private:
bool _lowercase_substitutions = false;
- pcre *_rex = nullptr;
- pcre_extra *_extra = nullptr;
- RemapRegex *_next = nullptr;
- TSHttpStatus _status = static_cast<TSHttpStatus>(0);
+ Regex _rex;
+ RegexMatchContext const *_match_context = nullptr; // owned by RemapInstance
+ RemapRegex *_next = nullptr;
+ TSHttpStatus _status = static_cast<TSHttpStatus>(0);
int _active_timeout = -1;
int _no_activity_timeout = -1;
@@ -319,7 +315,7 @@ RemapRegex::initialize(const std::string ®, const
std::string &sub, const std
// These take an option 0|1 value, without value it implies 1
if (opt.compare(start, 8, "caseless") == 0) {
- _options |= PCRE_CASELESS;
+ _options |= RE_CASE_INSENSITIVE;
} else if (opt.compare(start, 23, "lowercase_substitutions") == 0) {
_lowercase_substitutions = true;
} else if (opt.compare(start, 8, "strategy") == 0) {
@@ -386,41 +382,26 @@ RemapRegex::initialize(const std::string ®, const
std::string &sub, const std
// Compile and study the regular expression.
int
-RemapRegex::compile(const char *&error, int &erroffset)
+RemapRegex::compile(std::string &error, int &erroffset)
{
- char *str;
- int ccount;
-
// Initialize these in case they are not set.
error = "unknown error";
erroffset = -1;
- _rex = pcre_compile(_rex_string, // the pattern
- _options, // options
- &error, // for error message
- &erroffset, // for error offset
- nullptr); // use default character tables
-
- if (nullptr == _rex) {
- return -1;
- }
-
- _extra = pcre_study(_rex, PCRE_STUDY_EXTRA_NEEDED, &error);
- if (error != nullptr) {
+ bool const restat = _rex.compile(_rex_string, error, erroffset, _options);
+ if (!restat) {
+ TSError("[%s] Error compiling : %s", PLUGIN_NAME, _rex_string);
return -1;
}
- // POOMA - also dependent on actual stack size. Crashes with previous value
of 2047,
- _extra->match_limit_recursion = 1750;
- _extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
-
- if (pcre_fullinfo(_rex, _extra, PCRE_INFO_CAPTURECOUNT, &ccount) != 0) {
- error = "call to pcre_fullinfo() failed";
+ int32_t const ccount = _rex.get_capture_count();
+ if (ccount < 0) {
+ error = "Failure to get capture count for Regex";
return -1;
}
// Get some info for the string substitutions
- str = _subst;
+ char *str = _subst;
_num_subs = 0;
while (str && *str) {
@@ -487,7 +468,7 @@ RemapRegex::compile(const char *&error, int &erroffset)
// We also calculate a total length for the new string, which is the max
length the
// substituted string can have (use it to allocate a buffer before calling
substitute() ).
int
-RemapRegex::get_lengths(const int ovector[], int lengths[], TSRemapRequestInfo
*rri, UrlComponents *req_url)
+RemapRegex::get_lengths(RegexMatches const &matches, int lengths[],
TSRemapRequestInfo *rri, UrlComponents *req_url)
{
int len = _subst_len + 1; // Bigger then necessary
@@ -495,7 +476,7 @@ RemapRegex::get_lengths(const int ovector[], int lengths[],
TSRemapRequestInfo *
int ix = _sub_ix[i];
if (ix < 10) {
- lengths[ix] = ovector[2 * ix + 1] - ovector[2 * ix]; // -1 - -1 == 0
+ lengths[ix] = matches[ix].length();
len += lengths[ix];
} else {
int tmp_len;
@@ -541,8 +522,8 @@ RemapRegex::get_lengths(const int ovector[], int lengths[],
TSRemapRequestInfo *
// regex that was matches, while $1 - $9 are the corresponding groups. Return
the final
// length of the string as written to dest (not including the trailing '0').
int
-RemapRegex::substitute(char dest[], const char *src, const int ovector[],
const int lengths[], TSHttpTxn txnp,
- TSRemapRequestInfo *rri, UrlComponents *req_url, bool
lowercase_substitutions)
+RemapRegex::substitute(char dest[], RegexMatches const &matches, const int
lengths[], TSHttpTxn txnp, TSRemapRequestInfo *rri,
+ UrlComponents *req_url, bool lowercase_substitutions)
{
if (_num_subs > 0) {
char *p1 = dest;
@@ -556,7 +537,7 @@ RemapRegex::substitute(char dest[], const char *src, const
int ovector[], const
memcpy(p1, p2, _sub_pos[i] - prev);
p1 += (_sub_pos[i] - prev);
if (ix < 10) {
- memcpy(p1, src + ovector[2 * ix], lengths[ix]);
+ memcpy(p1, matches[ix].data(), matches[ix].length());
p1 += lengths[ix];
} else {
char buff[INET6_ADDRSTRLEN];
@@ -630,17 +611,18 @@ RemapRegex::substitute(char dest[], const char *src,
const int ovector[], const
struct RemapInstance {
RemapInstance() : filename("unknown") {}
- RemapRegex *first = nullptr;
- RemapRegex *last = nullptr;
- bool pristine_url = false;
- bool profile = false;
- bool method = false;
- bool query_string = true;
- bool host = false;
- int hits = 0;
- int misses = 0;
- int failures = 0;
- std::string filename;
+ RemapRegex *first = nullptr;
+ RemapRegex *last = nullptr;
+ RegexMatchContext match_context = {};
+ bool pristine_url = false;
+ bool profile = false;
+ bool method = false;
+ bool query_string = true;
+ bool host = false;
+ int hits = 0;
+ int misses = 0;
+ int failures = 0;
+ std::string filename;
};
///////////////////////////////////////////////////////////////////////////////
@@ -783,11 +765,12 @@ TSRemapNewInstance(int argc, char *argv[], void **ih,
char * /* errbuf ATS_UNUSE
continue;
}
- const char *error;
+ std::string error;
int erroffset;
- if (cur->compile(error, erroffset) < 0) {
+ Dbg(dbg_ctl, "Compiling regex: %s", regex.c_str());
+ if (0 != cur->compile(error, erroffset)) {
std::ostringstream oss;
- oss << '[' << PLUGIN_NAME << "] PCRE failed in " <<
(ri->filename).c_str() << " (line " << lineno << ')';
+ oss << '[' << PLUGIN_NAME << "] Regex compile failed in " <<
(ri->filename).c_str() << " (line " << lineno << ')';
if (erroffset > 0) {
oss << " at offset " << erroffset;
}
@@ -801,6 +784,7 @@ TSRemapNewInstance(int argc, char *argv[], void **ih, char
* /* errbuf ATS_UNUSE
} else {
Dbg(dbg_ctl, "Added regex=%s with subs=%s and options `%s'",
regex.c_str(), subst.c_str(), options.c_str());
cur->set_order(++count);
+ cur->set_match_context(&(ri->match_context));
auto tmp = cur.get();
if (ri->first == nullptr) {
ri->first = cur.release();
@@ -811,6 +795,8 @@ TSRemapNewInstance(int argc, char *argv[], void **ih, char
* /* errbuf ATS_UNUSE
}
}
+ ri->match_context.set_match_limit(REGEX_MATCH_LIMIT);
+
// Make sure we got something...
if (ri->first == nullptr) {
TSError("[%s] no regular expressions from the maps", PLUGIN_NAME);
@@ -823,6 +809,7 @@ TSRemapNewInstance(int argc, char *argv[], void **ih, char
* /* errbuf ATS_UNUSE
void
TSRemapDeleteInstance(void *ih)
{
+ Dbg(dbg_ctl, "TSRemapDeleteInstance");
RemapInstance *ri = static_cast<RemapInstance *>(ih);
RemapRegex *re;
RemapRegex *tmp;
@@ -915,8 +902,7 @@ TSRemapDoRemap(void *ih, TSHttpTxn txnp, TSRemapRequestInfo
*rri)
UrlComponents req_url;
req_url.populate(src_url.bufp, src_url.loc);
- int ovector[OVECCOUNT];
- int lengths[OVECCOUNT / 2 + 1];
+ int lengths[MATCHCOUNT + 1];
int dest_len;
TSRemapStatus retval = TSREMAP_DID_REMAP;
RemapRegex *re = ri->first;
@@ -963,12 +949,14 @@ TSRemapDoRemap(void *ih, TSHttpTxn txnp,
TSRemapRequestInfo *rri)
match_buf[match_len] = '\0'; // NULL terminate the match string
Dbg(dbg_ctl, "Target match string is `%s'", match_buf);
+ RegexMatches matches(MATCHCOUNT);
+
// Apply the regular expressions, in order. First one wins.
while (re) {
// Since we check substitutions on parse time, we don't need to reset
ovector
- auto match_result = re->match(match_buf, match_len, ovector);
+ auto match_result = re->match(match_buf, matches);
if (match_result >= 0) {
- int new_len = re->get_lengths(ovector, lengths, rri, &req_url);
+ int new_len = re->get_lengths(matches, lengths, rri, &req_url);
// Set timeouts
if (re->active_timeout_option() > (-1)) {
@@ -1040,7 +1028,7 @@ TSRemapDoRemap(void *ih, TSHttpTxn txnp,
TSRemapRequestInfo *rri)
char *dest;
dest = static_cast<char *>(alloca(new_len + 8));
- dest_len = re->substitute(dest, match_buf, ovector, lengths, txnp,
rri, &req_url, lowercase_substitutions);
+ dest_len = re->substitute(dest, matches, lengths, txnp, rri, &req_url,
lowercase_substitutions);
Dbg(dbg_ctl, "New URL is estimated to be %d bytes long, or less",
new_len);
Dbg(dbg_ctl, "New URL is %s (length %d)", dest, dest_len);
@@ -1075,8 +1063,9 @@ TSRemapDoRemap(void *ih, TSHttpTxn txnp,
TSRemapRequestInfo *rri)
}
} else if (match_result != -1) {
ink_atomic_increment(&(ri->failures), 1);
- TSError(R"([%s] Bad regular expression result %d from "%s" in file
"%s".)", PLUGIN_NAME, match_result, re->regex(),
- ri->filename.c_str());
+ std::string const errmsg = Regex::get_error_string(match_result);
+ TSError(R"([%s] Bad regular expression result %d ("%s") from "%s" in
file "%s".)", PLUGIN_NAME, match_result, errmsg.c_str(),
+ re->regex(), ri->filename.c_str());
}
// Try the next regex
diff --git a/src/proxy/http/remap/RemapConfig.cc
b/src/proxy/http/remap/RemapConfig.cc
index 73d74b8bf4..a1ceac4e0e 100644
--- a/src/proxy/http/remap/RemapConfig.cc
+++ b/src/proxy/http/remap/RemapConfig.cc
@@ -974,7 +974,7 @@ process_regex_mapping_config(const char *from_host_lower,
url_mapping *new_mappi
std::string_view to_host{};
int to_host_len;
int substitution_id;
- int captures;
+ int32_t captures;
reg_map->to_url_host_template = nullptr;
reg_map->to_url_host_template_len = 0;
diff --git a/src/tsutil/Regex.cc b/src/tsutil/Regex.cc
index c40d64491b..0e76c50ce1 100644
--- a/src/tsutil/Regex.cc
+++ b/src/tsutil/Regex.cc
@@ -208,6 +208,75 @@ RegexMatches::operator[](size_t index) const
return std::string_view(_subject.data() + ovector[2 * index], ovector[2 *
index + 1] - ovector[2 * index]);
}
+//----------------------------------------------------------------------------
+struct RegexMatchContext::_MatchContext {
+ static pcre2_match_context *
+ get(_MatchContextPtr const &p)
+ {
+ return static_cast<pcre2_match_context *>(p._ptr);
+ }
+ static void
+ set(_MatchContextPtr &p, pcre2_match_context *ptr)
+ {
+ p._ptr = ptr;
+ }
+};
+
+//----------------------------------------------------------------------------
+RegexMatchContext::RegexMatchContext()
+{
+ auto ctx = pcre2_match_context_create(nullptr);
+ debug_assert_message(ctx, "Failed to allocate custom pcre2 match context");
+ _MatchContext::set(_match_context, ctx);
+}
+
+//----------------------------------------------------------------------------
+RegexMatchContext::RegexMatchContext(RegexMatchContext const &other)
+{
+ auto ptr = _MatchContext::get(other._match_context);
+ if (nullptr != ptr) {
+ pcre2_match_context *const ctx = pcre2_match_context_copy(ptr);
+ _MatchContext::set(_match_context, ctx);
+ }
+}
+
+//----------------------------------------------------------------------------
+RegexMatchContext &
+RegexMatchContext::operator=(RegexMatchContext const &other)
+{
+ if (&other != this) {
+ auto ptr = _MatchContext::get(other._match_context);
+ if (nullptr != ptr) {
+ pcre2_match_context *const ctx = pcre2_match_context_copy(ptr);
+ _MatchContext::set(_match_context, ctx);
+ } else {
+ _MatchContext::set(_match_context, nullptr);
+ }
+ }
+ return *this;
+}
+
+//----------------------------------------------------------------------------
+RegexMatchContext::~RegexMatchContext()
+{
+ auto ptr = _MatchContext::get(_match_context);
+ debug_assert_message(ptr, "Failed to get the match context");
+ if (ptr != nullptr) {
+ pcre2_match_context_free(ptr);
+ }
+}
+
+//----------------------------------------------------------------------------
+void
+RegexMatchContext::set_match_limit(uint32_t limit)
+{
+ auto ptr = _MatchContext::get(_match_context);
+ debug_assert_message(ptr, "Failed to get the match context");
+ if (ptr != nullptr) {
+ pcre2_set_match_limit(ptr, limit);
+ }
+}
+
//----------------------------------------------------------------------------
struct Regex::_Code {
static pcre2_code *
@@ -314,7 +383,7 @@ Regex::compile(std::string_view pattern, std::string
&error, int &erroroffset, u
// get pcre2 error message
PCRE2_UCHAR buffer[256];
pcre2_get_error_message(error_code, buffer, sizeof(buffer));
- error.assign((char *)buffer);
+ error.assign((char const *)buffer);
return false;
}
@@ -355,7 +424,7 @@ Regex::exec(std::string_view subject, RegexMatches
&matches) const
//----------------------------------------------------------------------------
int32_t
-Regex::exec(std::string_view subject, RegexMatches &matches, uint32_t flags)
const
+Regex::exec(std::string_view subject, RegexMatches &matches, uint32_t flags,
RegexMatchContext const *const matchContext) const
{
auto code = _Code::get(_code);
@@ -363,33 +432,69 @@ Regex::exec(std::string_view subject, RegexMatches
&matches, uint32_t flags) con
if (code == nullptr) {
return PCRE2_ERROR_NULL;
}
- int count = pcre2_match(code, reinterpret_cast<PCRE2_SPTR>(subject.data()),
subject.size(), 0, flags,
- RegexMatches::_MatchData::get(matches._match_data),
RegexContext::get_instance()->get_match_context());
- matches._size = count;
+ // Use the provided or the thread global context?
+ pcre2_match_context *match_context;
+ if (nullptr == matchContext) {
+ match_context = RegexContext::get_instance()->get_match_context();
+ } else {
+ match_context =
RegexMatchContext::_MatchContext::get(matchContext->_match_context);
+ }
+
+ int const rc = pcre2_match(code,
reinterpret_cast<PCRE2_SPTR>(subject.data()), subject.size(), 0, flags,
+
RegexMatches::_MatchData::get(matches._match_data), match_context);
+
+ matches._size = rc;
// match was successful
- if (count >= 0) {
+ if (rc >= 0) {
matches._subject = subject;
// match but the output vector was too small, adjust the size of the
matches
- if (count == 0) {
+ if (rc == 0) {
matches._size =
pcre2_get_ovector_count(RegexMatches::_MatchData::get(matches._match_data));
}
}
- return count;
+ return rc;
+}
+
+//----------------------------------------------------------------------------
+// static
+std::string
+Regex::get_error_string(int rc)
+{
+ std::string res;
+
+ if (rc < 0) {
+ PCRE2_UCHAR buffer[256];
+ pcre2_get_error_message(rc, buffer, sizeof(buffer));
+ res.assign((char const *)buffer);
+ }
+
+ return res;
}
//----------------------------------------------------------------------------
int32_t
-Regex::get_capture_count()
+Regex::get_capture_count() const
{
- int captures = -1;
+ uint32_t captures = 0;
if (pcre2_pattern_info(_Code::get(_code), PCRE2_INFO_CAPTURECOUNT,
&captures) != 0) {
return -1;
}
- return captures;
+ return static_cast<int32_t>(captures);
+}
+
+//----------------------------------------------------------------------------
+int32_t
+Regex::get_backref_max() const
+{
+ uint32_t refs = 0;
+ if (pcre2_pattern_info(_Code::get(_code), PCRE2_INFO_BACKREFMAX, &refs) !=
0) {
+ return -1;
+ }
+ return static_cast<int32_t>(refs);
}
//----------------------------------------------------------------------------
diff --git a/src/tsutil/unit_tests/test_Regex.cc
b/src/tsutil/unit_tests/test_Regex.cc
index 679117f315..b1b2c1609d 100644
--- a/src/tsutil/unit_tests/test_Regex.cc
+++ b/src/tsutil/unit_tests/test_Regex.cc
@@ -81,7 +81,7 @@ struct submatch_t {
struct submatch_test_t {
std::string_view regex;
- int capture_count;
+ int32_t capture_count;
std::vector<submatch_t> tests;
};
@@ -489,3 +489,55 @@ TEST_CASE("Regex copy with RE_NOTEMPTY flag",
"[libts][Regex][copy][flags]")
CHECK(copy.exec(std::string_view(""), RE_NOTEMPTY) == false);
}
}
+
+struct backref_test_t {
+ std::string_view regex;
+ bool valid;
+ int32_t backref_max;
+};
+
+std::vector<backref_test_t> backref_test_data{
+ {{""}, true, 0 },
+ {{R"(\b(\w+)\s+\1\b)"}, true, 1 },
+ {{R"((.)\1)"}, true, 1 },
+ {{R"((.)(.).\2\1)"}, true, 2 },
+ {{R"((.\2\1)"}, false, -1},
+};
+
+TEST_CASE("Regex back reference counting", "[libts][Regex][get_backref_max]")
+{
+ // case sensitive test
+ for (auto &item : backref_test_data) {
+ Regex r;
+ REQUIRE(r.compile(item.regex) == item.valid);
+ REQUIRE(r.get_backref_max() == item.backref_max);
+ }
+}
+
+struct match_context_test_t {
+ std::string_view regex;
+ std::string_view str;
+ bool valid;
+ int32_t rcode;
+};
+
+std::vector<match_context_test_t> match_context_test_data{
+ {{"abc"}, {"abc"}, true, 1 },
+ {{"abc"}, {"a"}, true, -1 },
+ {{R"(^(\d{3})-(\d{3})-(\d{4})$)"}, {"123-456-7890"}, true, -47},
+ {{"(."}, {"a"}, false, -51},
+};
+
+TEST_CASE("RegexMatchContext", "[libts][Regex][RegexMatchContext]")
+{
+ RegexMatchContext match_context;
+ match_context.set_match_limit(2);
+ RegexMatches matches;
+
+ // case sensitive test
+ for (auto &item : match_context_test_data) {
+ Regex r;
+ REQUIRE(r.compile(item.regex) == item.valid);
+ REQUIRE(r.exec(item.str, matches, 0, &match_context) == item.rcode);
+ }
+}
diff --git a/tests/gold_tests/pluginTest/regex_remap/regex_remap.test.py
b/tests/gold_tests/pluginTest/regex_remap/regex_remap.test.py
index 12e3b9867e..1c98e10909 100644
--- a/tests/gold_tests/pluginTest/regex_remap/regex_remap.test.py
+++ b/tests/gold_tests/pluginTest/regex_remap/regex_remap.test.py
@@ -60,6 +60,7 @@ ts.Disk.File(
[
"# regex_remap configuration\n"
"^/alpha/bravo/[?]((?!action=(newsfeed|calendar|contacts|notepad)).)*$
https://redirect.com/ @status=301\n"
+ "^/match_limit/(a+)+$ https://redirect.com/ @status=301\n"
])
ts.Disk.File(
@@ -119,13 +120,24 @@ tr.Processes.Default.ReturnCode = 0
tr.Processes.Default.Streams.stdout = "gold/regex_remap_simple.gold"
tr.StillRunningAfter = ts
-# 3 Test - Crash test.
-tr = Test.AddTestRun("crash test")
+# 3 Test - Match limit test 0
+tr = Test.AddTestRun("match limit 0")
creq = replay_txns[1]['client-request']
tr.MakeCurlCommand(curl_and_args + \
'--header "uuid: {}" '.format(creq["headers"]["fields"][1][1]) +
'"{}"'.format(creq["url"]), ts=ts)
tr.Processes.Default.ReturnCode = 0
tr.Processes.Default.Streams.stdout = "gold/regex_remap_crash.gold"
ts.Disk.diags_log.Content = Testers.ContainsExpression(
- 'ERROR: .regex_remap. Bad regular expression result -21', "Resource limit
exceeded")
+ 'ERROR: .regex_remap. Bad regular expression result -47', "Match limit
exceeded")
+tr.StillRunningAfter = ts
+
+# 4 Test - Match limit test 1
+tr = Test.AddTestRun("match limit 1")
+creq = replay_txns[2]['client-request']
+tr.MakeCurlCommand(curl_and_args + \
+ '--header "uuid: {}" '.format(creq["headers"]["fields"][1][1]) +
'"{}"'.format(creq["url"]), ts=ts)
+tr.Processes.Default.ReturnCode = 0
+tr.Processes.Default.Streams.stdout = "gold/regex_remap_crash.gold"
+ts.Disk.diags_log.Content = Testers.ContainsExpression(
+ 'ERROR: .regex_remap. Bad regular expression result -47', "Match limit
exceeded")
tr.StillRunningAfter = ts
diff --git
a/tests/gold_tests/pluginTest/regex_remap/replay/yts-2819.replay.json
b/tests/gold_tests/pluginTest/regex_remap/replay/yts-2819.replay.json
index 5083a134e5..4361a9800f 100644
--- a/tests/gold_tests/pluginTest/regex_remap/replay/yts-2819.replay.json
+++ b/tests/gold_tests/pluginTest/regex_remap/replay/yts-2819.replay.json
@@ -156,6 +156,80 @@
]
}
}
+ },
+ {
+ "uuid": "match_limit",
+ "client-request": {
+ "version": "1.1",
+ "scheme": "http",
+ "method": "GET",
+ "url": "http://example.one/match_limit/aaaaaaaaaaaaaaaaaaaf",
+ "headers": {
+ "fields": [
+ [
+ "Host",
+ "example.one"
+ ],
+ [
+ "uuid",
+ "match_limit"
+ ]
+ ]
+ }
+ },
+ "proxy-request": {
+ "version": "1.1",
+ "scheme": "http",
+ "method": "GET",
+ "url": "http://example.one/",
+ "headers": {
+ "fields": [
+ [
+ "uuid",
+ "match_limit"
+ ]
+ ]
+ }
+ },
+ "server-response": {
+ "status": 200,
+ "reason": "OK",
+ "content": {
+ "size": 6128
+ },
+ "headers": {
+ "fields": [
+ [
+ "Host",
+ "example.one"
+ ],
+ [
+ "uuid",
+ "180"
+ ],
+ [
+ "Content-Length",
+ "6128"
+ ],
+ [
+ "Connection",
+ "close"
+ ]
+ ]
+ }
+ },
+ "proxy-response": {
+ "status": 200,
+ "reason": "OK",
+ "content": {
+ "size": 6128
+ },
+ "headers": {
+ "fields": [
+ [ "Content-Length", 6128 ]
+ ]
+ }
+ }
}
]
}