This is an automated email from the ASF dual-hosted git repository.

bnolsen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/master by this push:
     new 891d348dff regex_remap: convert from pcre to Regex (#12575)
891d348dff is described below

commit 891d348dff9c9e804900d9aaeef5d4c78f9e3f69
Author: Brian Olsen <[email protected]>
AuthorDate: Thu Nov 6 11:52:11 2025 -0700

    regex_remap: convert from pcre to Regex (#12575)
    
    * regex_remap: convert from pcre to Regex
    
    * get RegexMatchContext helper class working
    
    * cleanup
    
    * remove pcre2 match context features not in centos
    
    * add back in comments from the pcre plugin
    
    * refactor Regex::captureCount(), add unit tests
    
    * change Regex api function convention, add to unit test
    
    * better handling of regex exec call failure
    
    * use errmsg in diags.log, restore older match limit test
---
 include/tsutil/Regex.h                             |  52 ++++++-
 plugins/experimental/cookie_remap/cookie_remap.cc  |   2 +-
 plugins/regex_remap/CMakeLists.txt                 |   2 +-
 plugins/regex_remap/regex_remap.cc                 | 151 ++++++++++-----------
 src/proxy/http/remap/RemapConfig.cc                |   2 +-
 src/tsutil/Regex.cc                                | 127 +++++++++++++++--
 src/tsutil/unit_tests/test_Regex.cc                |  54 +++++++-
 .../pluginTest/regex_remap/regex_remap.test.py     |  18 ++-
 .../regex_remap/replay/yts-2819.replay.json        |  74 ++++++++++
 9 files changed, 380 insertions(+), 102 deletions(-)

diff --git a/include/tsutil/Regex.h b/include/tsutil/Regex.h
index cd8d7c1cb4..cc5260c24f 100644
--- a/include/tsutil/Regex.h
+++ b/include/tsutil/Regex.h
@@ -94,6 +94,41 @@ private:
   _MatchDataPtr _match_data;
 };
 
+/// @brief Wrapper for PCRE2 match context
+///
+/// @internal This instance is not tied to any Regex and can be used with one 
of the Regex::exec overloads.
+class RegexMatchContext
+{
+  friend class Regex;
+
+public:
+  /** Construct a new RegexMatchContext object.
+   */
+  RegexMatchContext();
+  ~RegexMatchContext();
+
+  /// uses pcre2_match_context_copy for a deep copy.
+  RegexMatchContext(RegexMatchContext const &orig);
+  RegexMatchContext &operator=(RegexMatchContext const &orig);
+
+  RegexMatchContext(RegexMatchContext &&)            = default;
+  RegexMatchContext &operator=(RegexMatchContext &&) = default;
+
+  /** Limits the amount of backtracking that can take place.
+   * Any regex exec call that fails will return PCRE2_ERROR_MATCHLIMIT(-47)
+   */
+  void set_match_limit(uint32_t limit);
+
+private:
+  /// @internal This wraps a void* so to avoid requiring a pcre2 include.
+  struct _MatchContext;
+  struct _MatchContextPtr {
+    void *_ptr = nullptr;
+  };
+
+  _MatchContextPtr _match_context;
+};
+
 /// @brief Wrapper for PCRE2 regular expression.
 class Regex
 {
@@ -179,6 +214,7 @@ public:
    * @param subject String to match against.
    * @param matches Place to store the capture groups.
    * @param flags Match flags (e.g., RE_NOTEMPTY).
+   * @param optional context Match context (set matching limits).
    * @return @c The number of capture groups. < 0 if an error occurred. 0 if 
the number of Matches is too small.
    *
    * It is safe to call this method concurrently on the same instance of @a 
this.
@@ -186,10 +222,20 @@ public:
    * Each capture group takes 3 elements of @a ovector, therefore @a ovecsize 
must
    * be a multiple of 3 and at least three times the number of desired capture 
groups.
    */
-  int exec(std::string_view subject, RegexMatches &matches, uint32_t flags) 
const;
+  int exec(std::string_view subject, RegexMatches &matches, uint32_t flags,
+           RegexMatchContext const *const matchContext = nullptr) const;
+
+  /** Error string for exec failure.
+   *
+   * @param int return code from exec call.
+   */
+  static std::string get_error_string(int rc);
+
+  /// @return The number of capture groups in the compiled pattern, -1 for 
fail.
+  int32_t get_capture_count() const;
 
-  /// @return The number of capture groups in the compiled pattern.
-  int get_capture_count();
+  /// @return number of highest back references, -1 for fail.
+  int32_t get_backref_max() const;
 
   /// @return Is the compiled pattern empty?
   bool empty() const;
diff --git a/plugins/experimental/cookie_remap/cookie_remap.cc 
b/plugins/experimental/cookie_remap/cookie_remap.cc
index ce7f5caeb4..f29033f4c6 100644
--- a/plugins/experimental/cookie_remap/cookie_remap.cc
+++ b/plugins/experimental/cookie_remap/cookie_remap.cc
@@ -445,7 +445,7 @@ private:
 
   Regex      *regex = nullptr;
   std::string regex_string;
-  int         regex_ccount = 0;
+  int32_t     regex_ccount = 0;
 
   std::string  bucket;
   unsigned int how_many = 0;
diff --git a/plugins/regex_remap/CMakeLists.txt 
b/plugins/regex_remap/CMakeLists.txt
index a69af3590d..2a233eedc3 100644
--- a/plugins/regex_remap/CMakeLists.txt
+++ b/plugins/regex_remap/CMakeLists.txt
@@ -17,6 +17,6 @@
 
 add_atsplugin(regex_remap regex_remap.cc)
 
-target_link_libraries(regex_remap PRIVATE PCRE::PCRE libswoc::libswoc)
+target_link_libraries(regex_remap PRIVATE libswoc::libswoc)
 
 verify_remap_plugin(regex_remap)
diff --git a/plugins/regex_remap/regex_remap.cc 
b/plugins/regex_remap/regex_remap.cc
index de38ba2378..18838291e2 100644
--- a/plugins/regex_remap/regex_remap.cc
+++ b/plugins/regex_remap/regex_remap.cc
@@ -45,17 +45,14 @@
 #include "tscore/ink_time.h"
 #include "tscore/ink_inet.h"
 
-#ifdef HAVE_PCRE_PCRE_H
-#include <pcre/pcre.h>
-#else
-#include <pcre.h>
-#endif
+#include "tsutil/Regex.h"
 
 static const char *PLUGIN_NAME = "regex_remap";
 
 // Constants
-static const int OVECCOUNT = 30; // We support $0 - $9 x2 ints, and this needs 
to be 1.5x that
-static const int MAX_SUBS  = 32; // No more than 32 substitution variables in 
the subst string
+static const int     MATCHCOUNT        = 15;   // We support $0 - $9 x2 ints, 
and this needs to be 1.5x that
+static const int     MAX_SUBS          = 32;   // No more than 32 substitution 
variables in the subst string
+static const int32_t REGEX_MATCH_LIMIT = 1750; // POOMA - also dependent on 
actual stack size. Crashes with previous value of 2047
 
 // Substitutions other than regex matches
 enum ExtraSubstitutions {
@@ -117,13 +114,6 @@ public:
     Dbg(dbg_ctl, "Calling destructor");
     TSfree(_rex_string);
     TSfree(_subst);
-
-    if (_rex) {
-      pcre_free(_rex);
-    }
-    if (_extra) {
-      pcre_free(_extra);
-    }
   }
 
   bool initialize(const std::string &reg, const std::string &sub, const 
std::string &opt);
@@ -140,25 +130,25 @@ public:
     fprintf(stderr, "[%s]:    Regex %d ( %s ): %.2f%%\n", now, ix, 
_rex_string, 100.0 * _hits / max);
   }
 
-  int compile(const char *&error, int &erroffset);
+  // Returns '0' on success
+  int compile(std::string &error, int &erroffset);
 
-  // Perform the regular expression matching against a string.
+  // number of matches, or negative if failed
   int
-  match(const char *str, int len, int ovector[])
+  match(std::string_view const str, RegexMatches &matches) const
   {
-    return pcre_exec(_rex,       // the compiled pattern
-                     _extra,     // Extra data from study (maybe)
-                     str,        // the subject string
-                     len,        // the length of the subject
-                     0,          // start at offset 0 in the subject
-                     0,          // default options
-                     ovector,    // output vector for substring information
-                     OVECCOUNT); // number of elements in the output vector
+    TSAssert(nullptr != _match_context);
+    int const stat = _rex.exec(str, matches, 0, _match_context);
+    if (0 <= stat) {
+      Dbg(dbg_ctl, "Regex match (%d): %.*s", stat, (int)str.length(), 
str.data());
+      return matches.size();
+    }
+    return stat;
   }
 
   // Substitutions
-  int get_lengths(const int ovector[], int lengths[], TSRemapRequestInfo *rri, 
UrlComponents *req_url);
-  int substitute(char dest[], const char *src, const int ovector[], const int 
lengths[], TSHttpTxn txnp, TSRemapRequestInfo *rri,
+  int get_lengths(RegexMatches const &matches, int lengths[], 
TSRemapRequestInfo *rri, UrlComponents *req_url);
+  int substitute(char dest[], RegexMatches const &matches, const int 
lengths[], TSHttpTxn txnp, TSRemapRequestInfo *rri,
                  UrlComponents *req_url, bool lowercase_substitutions);
 
   // setter / getters for members the linked list.
@@ -173,6 +163,12 @@ public:
     return _next;
   }
 
+  inline void
+  set_match_context(RegexMatchContext const *const ctx)
+  {
+    _match_context = ctx;
+  }
+
   // setter / getters for order number within the linked list
   inline void
   set_order(int order)
@@ -263,10 +259,10 @@ private:
 
   bool _lowercase_substitutions = false;
 
-  pcre        *_rex    = nullptr;
-  pcre_extra  *_extra  = nullptr;
-  RemapRegex  *_next   = nullptr;
-  TSHttpStatus _status = static_cast<TSHttpStatus>(0);
+  Regex                    _rex;
+  RegexMatchContext const *_match_context = nullptr; // owned by RemapInstance
+  RemapRegex              *_next          = nullptr;
+  TSHttpStatus             _status        = static_cast<TSHttpStatus>(0);
 
   int _active_timeout      = -1;
   int _no_activity_timeout = -1;
@@ -319,7 +315,7 @@ RemapRegex::initialize(const std::string &reg, const 
std::string &sub, const std
 
     // These take an option 0|1 value, without value it implies 1
     if (opt.compare(start, 8, "caseless") == 0) {
-      _options |= PCRE_CASELESS;
+      _options |= RE_CASE_INSENSITIVE;
     } else if (opt.compare(start, 23, "lowercase_substitutions") == 0) {
       _lowercase_substitutions = true;
     } else if (opt.compare(start, 8, "strategy") == 0) {
@@ -386,41 +382,26 @@ RemapRegex::initialize(const std::string &reg, const 
std::string &sub, const std
 
 // Compile and study the regular expression.
 int
-RemapRegex::compile(const char *&error, int &erroffset)
+RemapRegex::compile(std::string &error, int &erroffset)
 {
-  char *str;
-  int   ccount;
-
   // Initialize these in case they are not set.
   error     = "unknown error";
   erroffset = -1;
 
-  _rex = pcre_compile(_rex_string, // the pattern
-                      _options,    // options
-                      &error,      // for error message
-                      &erroffset,  // for error offset
-                      nullptr);    // use default character tables
-
-  if (nullptr == _rex) {
-    return -1;
-  }
-
-  _extra = pcre_study(_rex, PCRE_STUDY_EXTRA_NEEDED, &error);
-  if (error != nullptr) {
+  bool const restat = _rex.compile(_rex_string, error, erroffset, _options);
+  if (!restat) {
+    TSError("[%s] Error compiling : %s", PLUGIN_NAME, _rex_string);
     return -1;
   }
 
-  // POOMA - also dependent on actual stack size. Crashes with previous value 
of 2047,
-  _extra->match_limit_recursion  = 1750;
-  _extra->flags                 |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
-
-  if (pcre_fullinfo(_rex, _extra, PCRE_INFO_CAPTURECOUNT, &ccount) != 0) {
-    error = "call to pcre_fullinfo() failed";
+  int32_t const ccount = _rex.get_capture_count();
+  if (ccount < 0) {
+    error = "Failure to get capture count for Regex";
     return -1;
   }
 
   // Get some info for the string substitutions
-  str       = _subst;
+  char *str = _subst;
   _num_subs = 0;
 
   while (str && *str) {
@@ -487,7 +468,7 @@ RemapRegex::compile(const char *&error, int &erroffset)
 // We also calculate a total length for the new string, which is the max 
length the
 // substituted string can have (use it to allocate a buffer before calling 
substitute() ).
 int
-RemapRegex::get_lengths(const int ovector[], int lengths[], TSRemapRequestInfo 
*rri, UrlComponents *req_url)
+RemapRegex::get_lengths(RegexMatches const &matches, int lengths[], 
TSRemapRequestInfo *rri, UrlComponents *req_url)
 {
   int len = _subst_len + 1; // Bigger then necessary
 
@@ -495,7 +476,7 @@ RemapRegex::get_lengths(const int ovector[], int lengths[], 
TSRemapRequestInfo *
     int ix = _sub_ix[i];
 
     if (ix < 10) {
-      lengths[ix]  = ovector[2 * ix + 1] - ovector[2 * ix]; // -1 - -1 == 0
+      lengths[ix]  = matches[ix].length();
       len         += lengths[ix];
     } else {
       int tmp_len;
@@ -541,8 +522,8 @@ RemapRegex::get_lengths(const int ovector[], int lengths[], 
TSRemapRequestInfo *
 // regex that was matches, while $1 - $9 are the corresponding groups. Return 
the final
 // length of the string as written to dest (not including the trailing '0').
 int
-RemapRegex::substitute(char dest[], const char *src, const int ovector[], 
const int lengths[], TSHttpTxn txnp,
-                       TSRemapRequestInfo *rri, UrlComponents *req_url, bool 
lowercase_substitutions)
+RemapRegex::substitute(char dest[], RegexMatches const &matches, const int 
lengths[], TSHttpTxn txnp, TSRemapRequestInfo *rri,
+                       UrlComponents *req_url, bool lowercase_substitutions)
 {
   if (_num_subs > 0) {
     char *p1   = dest;
@@ -556,7 +537,7 @@ RemapRegex::substitute(char dest[], const char *src, const 
int ovector[], const
       memcpy(p1, p2, _sub_pos[i] - prev);
       p1 += (_sub_pos[i] - prev);
       if (ix < 10) {
-        memcpy(p1, src + ovector[2 * ix], lengths[ix]);
+        memcpy(p1, matches[ix].data(), matches[ix].length());
         p1 += lengths[ix];
       } else {
         char        buff[INET6_ADDRSTRLEN];
@@ -630,17 +611,18 @@ RemapRegex::substitute(char dest[], const char *src, 
const int ovector[], const
 struct RemapInstance {
   RemapInstance() : filename("unknown") {}
 
-  RemapRegex *first        = nullptr;
-  RemapRegex *last         = nullptr;
-  bool        pristine_url = false;
-  bool        profile      = false;
-  bool        method       = false;
-  bool        query_string = true;
-  bool        host         = false;
-  int         hits         = 0;
-  int         misses       = 0;
-  int         failures     = 0;
-  std::string filename;
+  RemapRegex       *first         = nullptr;
+  RemapRegex       *last          = nullptr;
+  RegexMatchContext match_context = {};
+  bool              pristine_url  = false;
+  bool              profile       = false;
+  bool              method        = false;
+  bool              query_string  = true;
+  bool              host          = false;
+  int               hits          = 0;
+  int               misses        = 0;
+  int               failures      = 0;
+  std::string       filename;
 };
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -783,11 +765,12 @@ TSRemapNewInstance(int argc, char *argv[], void **ih, 
char * /* errbuf ATS_UNUSE
       continue;
     }
 
-    const char *error;
+    std::string error;
     int         erroffset;
-    if (cur->compile(error, erroffset) < 0) {
+    Dbg(dbg_ctl, "Compiling regex: %s", regex.c_str());
+    if (0 != cur->compile(error, erroffset)) {
       std::ostringstream oss;
-      oss << '[' << PLUGIN_NAME << "] PCRE failed in " << 
(ri->filename).c_str() << " (line " << lineno << ')';
+      oss << '[' << PLUGIN_NAME << "] Regex compile failed in " << 
(ri->filename).c_str() << " (line " << lineno << ')';
       if (erroffset > 0) {
         oss << " at offset " << erroffset;
       }
@@ -801,6 +784,7 @@ TSRemapNewInstance(int argc, char *argv[], void **ih, char 
* /* errbuf ATS_UNUSE
     } else {
       Dbg(dbg_ctl, "Added regex=%s with subs=%s and options `%s'", 
regex.c_str(), subst.c_str(), options.c_str());
       cur->set_order(++count);
+      cur->set_match_context(&(ri->match_context));
       auto tmp = cur.get();
       if (ri->first == nullptr) {
         ri->first = cur.release();
@@ -811,6 +795,8 @@ TSRemapNewInstance(int argc, char *argv[], void **ih, char 
* /* errbuf ATS_UNUSE
     }
   }
 
+  ri->match_context.set_match_limit(REGEX_MATCH_LIMIT);
+
   // Make sure we got something...
   if (ri->first == nullptr) {
     TSError("[%s] no regular expressions from the maps", PLUGIN_NAME);
@@ -823,6 +809,7 @@ TSRemapNewInstance(int argc, char *argv[], void **ih, char 
* /* errbuf ATS_UNUSE
 void
 TSRemapDeleteInstance(void *ih)
 {
+  Dbg(dbg_ctl, "TSRemapDeleteInstance");
   RemapInstance *ri = static_cast<RemapInstance *>(ih);
   RemapRegex    *re;
   RemapRegex    *tmp;
@@ -915,8 +902,7 @@ TSRemapDoRemap(void *ih, TSHttpTxn txnp, TSRemapRequestInfo 
*rri)
   UrlComponents req_url;
   req_url.populate(src_url.bufp, src_url.loc);
 
-  int           ovector[OVECCOUNT];
-  int           lengths[OVECCOUNT / 2 + 1];
+  int           lengths[MATCHCOUNT + 1];
   int           dest_len;
   TSRemapStatus retval    = TSREMAP_DID_REMAP;
   RemapRegex   *re        = ri->first;
@@ -963,12 +949,14 @@ TSRemapDoRemap(void *ih, TSHttpTxn txnp, 
TSRemapRequestInfo *rri)
   match_buf[match_len] = '\0'; // NULL terminate the match string
   Dbg(dbg_ctl, "Target match string is `%s'", match_buf);
 
+  RegexMatches matches(MATCHCOUNT);
+
   // Apply the regular expressions, in order. First one wins.
   while (re) {
     // Since we check substitutions on parse time, we don't need to reset 
ovector
-    auto match_result = re->match(match_buf, match_len, ovector);
+    auto match_result = re->match(match_buf, matches);
     if (match_result >= 0) {
-      int new_len = re->get_lengths(ovector, lengths, rri, &req_url);
+      int new_len = re->get_lengths(matches, lengths, rri, &req_url);
 
       // Set timeouts
       if (re->active_timeout_option() > (-1)) {
@@ -1040,7 +1028,7 @@ TSRemapDoRemap(void *ih, TSHttpTxn txnp, 
TSRemapRequestInfo *rri)
         char *dest;
 
         dest     = static_cast<char *>(alloca(new_len + 8));
-        dest_len = re->substitute(dest, match_buf, ovector, lengths, txnp, 
rri, &req_url, lowercase_substitutions);
+        dest_len = re->substitute(dest, matches, lengths, txnp, rri, &req_url, 
lowercase_substitutions);
 
         Dbg(dbg_ctl, "New URL is estimated to be %d bytes long, or less", 
new_len);
         Dbg(dbg_ctl, "New URL is %s (length %d)", dest, dest_len);
@@ -1075,8 +1063,9 @@ TSRemapDoRemap(void *ih, TSHttpTxn txnp, 
TSRemapRequestInfo *rri)
       }
     } else if (match_result != -1) {
       ink_atomic_increment(&(ri->failures), 1);
-      TSError(R"([%s] Bad regular expression result %d from "%s" in file 
"%s".)", PLUGIN_NAME, match_result, re->regex(),
-              ri->filename.c_str());
+      std::string const errmsg = Regex::get_error_string(match_result);
+      TSError(R"([%s] Bad regular expression result %d ("%s") from "%s" in 
file "%s".)", PLUGIN_NAME, match_result, errmsg.c_str(),
+              re->regex(), ri->filename.c_str());
     }
 
     // Try the next regex
diff --git a/src/proxy/http/remap/RemapConfig.cc 
b/src/proxy/http/remap/RemapConfig.cc
index 73d74b8bf4..a1ceac4e0e 100644
--- a/src/proxy/http/remap/RemapConfig.cc
+++ b/src/proxy/http/remap/RemapConfig.cc
@@ -974,7 +974,7 @@ process_regex_mapping_config(const char *from_host_lower, 
url_mapping *new_mappi
   std::string_view to_host{};
   int              to_host_len;
   int              substitution_id;
-  int              captures;
+  int32_t          captures;
 
   reg_map->to_url_host_template     = nullptr;
   reg_map->to_url_host_template_len = 0;
diff --git a/src/tsutil/Regex.cc b/src/tsutil/Regex.cc
index c40d64491b..0e76c50ce1 100644
--- a/src/tsutil/Regex.cc
+++ b/src/tsutil/Regex.cc
@@ -208,6 +208,75 @@ RegexMatches::operator[](size_t index) const
   return std::string_view(_subject.data() + ovector[2 * index], ovector[2 * 
index + 1] - ovector[2 * index]);
 }
 
+//----------------------------------------------------------------------------
+struct RegexMatchContext::_MatchContext {
+  static pcre2_match_context *
+  get(_MatchContextPtr const &p)
+  {
+    return static_cast<pcre2_match_context *>(p._ptr);
+  }
+  static void
+  set(_MatchContextPtr &p, pcre2_match_context *ptr)
+  {
+    p._ptr = ptr;
+  }
+};
+
+//----------------------------------------------------------------------------
+RegexMatchContext::RegexMatchContext()
+{
+  auto ctx = pcre2_match_context_create(nullptr);
+  debug_assert_message(ctx, "Failed to allocate custom pcre2 match context");
+  _MatchContext::set(_match_context, ctx);
+}
+
+//----------------------------------------------------------------------------
+RegexMatchContext::RegexMatchContext(RegexMatchContext const &other)
+{
+  auto ptr = _MatchContext::get(other._match_context);
+  if (nullptr != ptr) {
+    pcre2_match_context *const ctx = pcre2_match_context_copy(ptr);
+    _MatchContext::set(_match_context, ctx);
+  }
+}
+
+//----------------------------------------------------------------------------
+RegexMatchContext &
+RegexMatchContext::operator=(RegexMatchContext const &other)
+{
+  if (&other != this) {
+    auto ptr = _MatchContext::get(other._match_context);
+    if (nullptr != ptr) {
+      pcre2_match_context *const ctx = pcre2_match_context_copy(ptr);
+      _MatchContext::set(_match_context, ctx);
+    } else {
+      _MatchContext::set(_match_context, nullptr);
+    }
+  }
+  return *this;
+}
+
+//----------------------------------------------------------------------------
+RegexMatchContext::~RegexMatchContext()
+{
+  auto ptr = _MatchContext::get(_match_context);
+  debug_assert_message(ptr, "Failed to get the match context");
+  if (ptr != nullptr) {
+    pcre2_match_context_free(ptr);
+  }
+}
+
+//----------------------------------------------------------------------------
+void
+RegexMatchContext::set_match_limit(uint32_t limit)
+{
+  auto ptr = _MatchContext::get(_match_context);
+  debug_assert_message(ptr, "Failed to get the match context");
+  if (ptr != nullptr) {
+    pcre2_set_match_limit(ptr, limit);
+  }
+}
+
 //----------------------------------------------------------------------------
 struct Regex::_Code {
   static pcre2_code *
@@ -314,7 +383,7 @@ Regex::compile(std::string_view pattern, std::string 
&error, int &erroroffset, u
     // get pcre2 error message
     PCRE2_UCHAR buffer[256];
     pcre2_get_error_message(error_code, buffer, sizeof(buffer));
-    error.assign((char *)buffer);
+    error.assign((char const *)buffer);
     return false;
   }
 
@@ -355,7 +424,7 @@ Regex::exec(std::string_view subject, RegexMatches 
&matches) const
 
 //----------------------------------------------------------------------------
 int32_t
-Regex::exec(std::string_view subject, RegexMatches &matches, uint32_t flags) 
const
+Regex::exec(std::string_view subject, RegexMatches &matches, uint32_t flags, 
RegexMatchContext const *const matchContext) const
 {
   auto code = _Code::get(_code);
 
@@ -363,33 +432,69 @@ Regex::exec(std::string_view subject, RegexMatches 
&matches, uint32_t flags) con
   if (code == nullptr) {
     return PCRE2_ERROR_NULL;
   }
-  int count = pcre2_match(code, reinterpret_cast<PCRE2_SPTR>(subject.data()), 
subject.size(), 0, flags,
-                          RegexMatches::_MatchData::get(matches._match_data), 
RegexContext::get_instance()->get_match_context());
 
-  matches._size = count;
+  // Use the provided or the thread global context?
+  pcre2_match_context *match_context;
+  if (nullptr == matchContext) {
+    match_context = RegexContext::get_instance()->get_match_context();
+  } else {
+    match_context = 
RegexMatchContext::_MatchContext::get(matchContext->_match_context);
+  }
+
+  int const rc = pcre2_match(code, 
reinterpret_cast<PCRE2_SPTR>(subject.data()), subject.size(), 0, flags,
+                             
RegexMatches::_MatchData::get(matches._match_data), match_context);
+
+  matches._size = rc;
 
   // match was successful
-  if (count >= 0) {
+  if (rc >= 0) {
     matches._subject = subject;
 
     // match but the output vector was too small, adjust the size of the 
matches
-    if (count == 0) {
+    if (rc == 0) {
       matches._size = 
pcre2_get_ovector_count(RegexMatches::_MatchData::get(matches._match_data));
     }
   }
 
-  return count;
+  return rc;
+}
+
+//----------------------------------------------------------------------------
+// static
+std::string
+Regex::get_error_string(int rc)
+{
+  std::string res;
+
+  if (rc < 0) {
+    PCRE2_UCHAR buffer[256];
+    pcre2_get_error_message(rc, buffer, sizeof(buffer));
+    res.assign((char const *)buffer);
+  }
+
+  return res;
 }
 
 //----------------------------------------------------------------------------
 int32_t
-Regex::get_capture_count()
+Regex::get_capture_count() const
 {
-  int captures = -1;
+  uint32_t captures = 0;
   if (pcre2_pattern_info(_Code::get(_code), PCRE2_INFO_CAPTURECOUNT, 
&captures) != 0) {
     return -1;
   }
-  return captures;
+  return static_cast<int32_t>(captures);
+}
+
+//----------------------------------------------------------------------------
+int32_t
+Regex::get_backref_max() const
+{
+  uint32_t refs = 0;
+  if (pcre2_pattern_info(_Code::get(_code), PCRE2_INFO_BACKREFMAX, &refs) != 
0) {
+    return -1;
+  }
+  return static_cast<int32_t>(refs);
 }
 
 //----------------------------------------------------------------------------
diff --git a/src/tsutil/unit_tests/test_Regex.cc 
b/src/tsutil/unit_tests/test_Regex.cc
index 679117f315..b1b2c1609d 100644
--- a/src/tsutil/unit_tests/test_Regex.cc
+++ b/src/tsutil/unit_tests/test_Regex.cc
@@ -81,7 +81,7 @@ struct submatch_t {
 
 struct submatch_test_t {
   std::string_view        regex;
-  int                     capture_count;
+  int32_t                 capture_count;
   std::vector<submatch_t> tests;
 };
 
@@ -489,3 +489,55 @@ TEST_CASE("Regex copy with RE_NOTEMPTY flag", 
"[libts][Regex][copy][flags]")
     CHECK(copy.exec(std::string_view(""), RE_NOTEMPTY) == false);
   }
 }
+
+struct backref_test_t {
+  std::string_view regex;
+  bool             valid;
+  int32_t          backref_max;
+};
+
+std::vector<backref_test_t> backref_test_data{
+  {{""},                  true,  0 },
+  {{R"(\b(\w+)\s+\1\b)"}, true,  1 },
+  {{R"((.)\1)"},          true,  1 },
+  {{R"((.)(.).\2\1)"},    true,  2 },
+  {{R"((.\2\1)"},         false, -1},
+};
+
+TEST_CASE("Regex back reference counting", "[libts][Regex][get_backref_max]")
+{
+  // case sensitive test
+  for (auto &item : backref_test_data) {
+    Regex r;
+    REQUIRE(r.compile(item.regex) == item.valid);
+    REQUIRE(r.get_backref_max() == item.backref_max);
+  }
+}
+
+struct match_context_test_t {
+  std::string_view regex;
+  std::string_view str;
+  bool             valid;
+  int32_t          rcode;
+};
+
+std::vector<match_context_test_t> match_context_test_data{
+  {{"abc"},                          {"abc"},          true,  1  },
+  {{"abc"},                          {"a"},            true,  -1 },
+  {{R"(^(\d{3})-(\d{3})-(\d{4})$)"}, {"123-456-7890"}, true,  -47},
+  {{"(."},                           {"a"},            false, -51},
+};
+
+TEST_CASE("RegexMatchContext", "[libts][Regex][RegexMatchContext]")
+{
+  RegexMatchContext match_context;
+  match_context.set_match_limit(2);
+  RegexMatches matches;
+
+  // case sensitive test
+  for (auto &item : match_context_test_data) {
+    Regex r;
+    REQUIRE(r.compile(item.regex) == item.valid);
+    REQUIRE(r.exec(item.str, matches, 0, &match_context) == item.rcode);
+  }
+}
diff --git a/tests/gold_tests/pluginTest/regex_remap/regex_remap.test.py 
b/tests/gold_tests/pluginTest/regex_remap/regex_remap.test.py
index 12e3b9867e..1c98e10909 100644
--- a/tests/gold_tests/pluginTest/regex_remap/regex_remap.test.py
+++ b/tests/gold_tests/pluginTest/regex_remap/regex_remap.test.py
@@ -60,6 +60,7 @@ ts.Disk.File(
         [
             "# regex_remap configuration\n"
             
"^/alpha/bravo/[?]((?!action=(newsfeed|calendar|contacts|notepad)).)*$ 
https://redirect.com/ @status=301\n"
+            "^/match_limit/(a+)+$ https://redirect.com/ @status=301\n"
         ])
 
 ts.Disk.File(
@@ -119,13 +120,24 @@ tr.Processes.Default.ReturnCode = 0
 tr.Processes.Default.Streams.stdout = "gold/regex_remap_simple.gold"
 tr.StillRunningAfter = ts
 
-# 3 Test - Crash test.
-tr = Test.AddTestRun("crash test")
+# 3 Test - Match limit test 0
+tr = Test.AddTestRun("match limit 0")
 creq = replay_txns[1]['client-request']
 tr.MakeCurlCommand(curl_and_args + \
     '--header "uuid: {}" '.format(creq["headers"]["fields"][1][1]) + 
'"{}"'.format(creq["url"]), ts=ts)
 tr.Processes.Default.ReturnCode = 0
 tr.Processes.Default.Streams.stdout = "gold/regex_remap_crash.gold"
 ts.Disk.diags_log.Content = Testers.ContainsExpression(
-    'ERROR: .regex_remap. Bad regular expression result -21', "Resource limit 
exceeded")
+    'ERROR: .regex_remap. Bad regular expression result -47', "Match limit 
exceeded")
+tr.StillRunningAfter = ts
+
+# 4 Test - Match limit test 1
+tr = Test.AddTestRun("match limit 1")
+creq = replay_txns[2]['client-request']
+tr.MakeCurlCommand(curl_and_args + \
+    '--header "uuid: {}" '.format(creq["headers"]["fields"][1][1]) + 
'"{}"'.format(creq["url"]), ts=ts)
+tr.Processes.Default.ReturnCode = 0
+tr.Processes.Default.Streams.stdout = "gold/regex_remap_crash.gold"
+ts.Disk.diags_log.Content = Testers.ContainsExpression(
+    'ERROR: .regex_remap. Bad regular expression result -47', "Match limit 
exceeded")
 tr.StillRunningAfter = ts
diff --git 
a/tests/gold_tests/pluginTest/regex_remap/replay/yts-2819.replay.json 
b/tests/gold_tests/pluginTest/regex_remap/replay/yts-2819.replay.json
index 5083a134e5..4361a9800f 100644
--- a/tests/gold_tests/pluginTest/regex_remap/replay/yts-2819.replay.json
+++ b/tests/gold_tests/pluginTest/regex_remap/replay/yts-2819.replay.json
@@ -156,6 +156,80 @@
               ]
             }
           }
+        },
+        {
+          "uuid": "match_limit",
+          "client-request": {
+            "version": "1.1",
+            "scheme": "http",
+            "method": "GET",
+            "url": "http://example.one/match_limit/aaaaaaaaaaaaaaaaaaaf";,
+            "headers": {
+              "fields": [
+                [
+                  "Host",
+                  "example.one"
+                ],
+                [
+                  "uuid",
+                  "match_limit"
+                ]
+              ]
+            }
+          },
+          "proxy-request": {
+            "version": "1.1",
+            "scheme": "http",
+            "method": "GET",
+            "url": "http://example.one/";,
+            "headers": {
+              "fields": [
+                [
+                  "uuid",
+                  "match_limit"
+                ]
+              ]
+            }
+          },
+          "server-response": {
+            "status": 200,
+            "reason": "OK",
+            "content": {
+              "size": 6128
+            },
+            "headers": {
+              "fields": [
+                [
+                  "Host",
+                  "example.one"
+                ],
+                [
+                  "uuid",
+                  "180"
+                ],
+                [
+                  "Content-Length",
+                  "6128"
+                ],
+                [
+                  "Connection",
+                  "close"
+                ]
+              ]
+            }
+          },
+          "proxy-response": {
+            "status": 200,
+            "reason": "OK",
+            "content": {
+              "size": 6128
+            },
+            "headers": {
+              "fields": [
+                [ "Content-Length", 6128 ]
+              ]
+            }
+          }
         }
       ]
     }

Reply via email to