This is an automated email from the ASF dual-hosted git repository.

bcall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/master by this push:
     new a57a824cab Convert cachekey plugin to use Regex class (PCRE2) instead 
of PCRE (#12607)
a57a824cab is described below

commit a57a824cabd217fb4001452cafce5278af286459
Author: Bryan Call <[email protected]>
AuthorDate: Fri Nov 7 11:07:39 2025 -0800

    Convert cachekey plugin to use Regex class (PCRE2) instead of PCRE (#12607)
---
 plugins/cachekey/pattern.cc                 | 134 +++-------
 plugins/cachekey/pattern.h                  |  25 +-
 plugins/cachekey/unit_tests/pattern_test.cc | 367 ++++++++++++++++++++++++++++
 src/tsutil/unit_tests/test_Regex.cc         | 332 +++++++++++++++++++++++++
 4 files changed, 744 insertions(+), 114 deletions(-)

diff --git a/plugins/cachekey/pattern.cc b/plugins/cachekey/pattern.cc
index 8492d4c360..e871e15f03 100644
--- a/plugins/cachekey/pattern.cc
+++ b/plugins/cachekey/pattern.cc
@@ -18,7 +18,7 @@
 
 /**
  * @file pattern.cc
- * @brief PRCE related classes.
+ * @brief Regex related classes.
  * @see pattern.h
  */
 
@@ -41,16 +41,14 @@ replaceString(String &str, const String &from, const String 
&to)
 Pattern::Pattern() : _pattern(""), _replacement("") {}
 
 /**
- * @brief Initializes PCRE pattern by providing the subject and replacement 
strings.
- * @param pattern PCRE pattern, a string containing PCRE patterns, capturing 
groups.
- * @param replacement PCRE replacement, a string where $0 ... $9 will be 
replaced with the corresponding capturing groups
+ * @brief Initializes Regex pattern by providing the subject and replacement 
strings.
+ * @param pattern Regex pattern, a string containing regex patterns, capturing 
groups.
+ * @param replacement Regex replacement, a string where $0 ... $9 will be 
replaced with the corresponding capturing groups
  * @return true if successful, false if failure
  */
 bool
 Pattern::init(const String &pattern, const String &replacement, bool replace)
 {
-  pcreFree();
-
   _pattern.assign(pattern);
   _replacement.assign(replacement);
   _replace = replace;
@@ -59,7 +57,6 @@ Pattern::init(const String &pattern, const String 
&replacement, bool replace)
 
   if (!compile()) {
     CacheKeyDebug("failed to initialize pattern:'%s', replacement:'%s'", 
pattern.c_str(), replacement.c_str());
-    pcreFree();
     return false;
   }
 
@@ -67,9 +64,9 @@ Pattern::init(const String &pattern, const String 
&replacement, bool replace)
 }
 
 /**
- * @brief Initializes PCRE pattern by providing the pattern only or 
pattern+replacement in a single configuration string.
+ * @brief Initializes Regex pattern by providing the pattern only or 
pattern+replacement in a single configuration string.
  * @see init()
- * @param config PCRE pattern <pattern> or PCRE pattern + replacement in 
format /<pattern>/<replacement>/
+ * @param config Regex pattern <pattern> or Regex pattern + replacement in 
format /<pattern>/<replacement>/
  * @return true if successful, false if failure
  */
 bool
@@ -130,32 +127,7 @@ Pattern::init(const String &config)
 bool
 Pattern::empty() const
 {
-  return _pattern.empty() || nullptr == _re;
-}
-
-/**
- * @brief Frees PCRE library related resources.
- */
-void
-Pattern::pcreFree()
-{
-  if (_re) {
-    pcre_free(_re);
-    _re = nullptr;
-  }
-
-  if (_extra) {
-    pcre_free(_extra);
-    _extra = nullptr;
-  }
-}
-
-/**
- * @brief Destructor, frees PCRE related resources.
- */
-Pattern::~Pattern()
-{
-  pcreFree();
+  return _pattern.empty() || _re.empty();
 }
 
 /**
@@ -198,23 +170,23 @@ Pattern::process(const String &subject, StringVector 
&result)
 }
 
 /**
- * @brief PCRE matches a subject string against the regex pattern.
- * @param subject PCRE subject
+ * @brief Regex matches a subject string against the regex pattern.
+ * @param subject Regex subject
  * @return true - matched, false - did not.
  */
 bool
 Pattern::match(const String &subject)
 {
-  int matchCount;
   CacheKeyDebug("matching '%s' to '%s'", _pattern.c_str(), subject.c_str());
 
-  if (!_re) {
+  if (_re.empty()) {
     return false;
   }
 
-  matchCount = pcre_exec(_re, _extra, subject.c_str(), subject.length(), 0, 
PCRE_NOTEMPTY, nullptr, 0);
+  RegexMatches matches;
+  int          matchCount = _re.exec(subject, matches, RE_NOTEMPTY);
   if (matchCount < 0) {
-    if (matchCount != PCRE_ERROR_NOMATCH) {
+    if (matchCount != RE_ERROR_NOMATCH) {
       CacheKeyError("matching error %d", matchCount);
     }
     return false;
@@ -224,38 +196,34 @@ Pattern::match(const String &subject)
 }
 
 /**
- * @brief Return all PCRE capture groups that matched in the subject string
- * @param subject PCRE subject string
+ * @brief Return all Regex capture groups that matched in the subject string
+ * @param subject Regex subject string
  * @param result reference to vector of strings containing all capture groups
  */
 bool
 Pattern::capture(const String &subject, StringVector &result)
 {
-  int matchCount;
-  int ovector[OVECOUNT];
-
   CacheKeyDebug("capturing '%s' from '%s'", _pattern.c_str(), subject.c_str());
 
-  if (!_re) {
+  if (_re.empty()) {
     CacheKeyError("regular expression not initialized");
     return false;
   }
 
-  matchCount = pcre_exec(_re, nullptr, subject.c_str(), subject.length(), 0, 
PCRE_NOTEMPTY, ovector, OVECOUNT);
+  RegexMatches matches;
+  int          matchCount = _re.exec(subject, matches, RE_NOTEMPTY);
   if (matchCount < 0) {
-    if (matchCount != PCRE_ERROR_NOMATCH) {
+    if (matchCount != RE_ERROR_NOMATCH) {
       CacheKeyError("matching error %d", matchCount);
     }
     return false;
   }
 
   for (int i = 0; i < matchCount; i++) {
-    int start  = ovector[2 * i];
-    int length = ovector[2 * i + 1] - ovector[2 * i];
+    std::string_view capture = matches[i];
+    String           dst(capture.data(), capture.length());
 
-    String dst(subject, start, length);
-
-    CacheKeyDebug("capturing '%s' %d[%d,%d]", dst.c_str(), i, ovector[2 * i], 
ovector[2 * i + 1]);
+    CacheKeyDebug("capturing '%s' %d", dst.c_str(), i);
     result.push_back(dst);
   }
 
@@ -263,27 +231,25 @@ Pattern::capture(const String &subject, StringVector 
&result)
 }
 
 /**
- * @brief Replaces all replacements found in the replacement string with what 
matched in the PCRE capturing groups.
- * @param subject PCRE subject string
+ * @brief Replaces all replacements found in the replacement string with what 
matched in the Regex capturing groups.
+ * @param subject Regex subject string
  * @param result reference to A string where the result of the replacement 
will be stored
  * @return true - success, false - nothing matched or failure.
  */
 bool
 Pattern::replace(const String &subject, String &result)
 {
-  int matchCount;
-  int ovector[OVECOUNT];
-
   CacheKeyDebug("replacing:'%s' in pattern:'%s', subject:'%s'", 
_replacement.c_str(), _pattern.c_str(), subject.c_str());
 
-  if (!_re || !_replace) {
+  if (_re.empty() || !_replace) {
     CacheKeyError("regular expression not initialized or not configured to 
replace");
     return false;
   }
 
-  matchCount = pcre_exec(_re, nullptr, subject.c_str(), subject.length(), 0, 
PCRE_NOTEMPTY, ovector, OVECOUNT);
+  RegexMatches matches;
+  int          matchCount = _re.exec(subject, matches, RE_NOTEMPTY);
   if (matchCount < 0) {
-    if (matchCount != PCRE_ERROR_NOMATCH) {
+    if (matchCount != RE_ERROR_NOMATCH) {
       CacheKeyError("matching error %d", matchCount);
     }
     return false;
@@ -299,18 +265,11 @@ Pattern::replace(const String &subject, String &result)
 
   int previous = 0;
   for (int i = 0; i < _tokenCount; i++) {
-    int replIndex = _tokens[i];
-    int start     = ovector[2 * replIndex];
-    int length    = ovector[2 * replIndex + 1] - ovector[2 * replIndex];
-
-    /* Handle the case when no match / a group capture result in an empty 
string */
-    if (start < 0) {
-      start  = 0;
-      length = 0;
-    }
+    int              replIndex = _tokens[i];
+    std::string_view capture   = matches[replIndex];
 
     String src(_replacement, _tokenOffset[i], 2);
-    String dst(subject, start, length);
+    String dst(capture.data(), capture.length());
 
     CacheKeyDebug("replacing '%s' with '%s'", src.c_str(), dst.c_str());
 
@@ -328,37 +287,20 @@ Pattern::replace(const String &subject, String &result)
 }
 
 /**
- * @brief PCRE compiles the regex, called only during initialization.
+ * @brief Compiles the regex, called only during initialization.
  * @return true if successful, false if not.
  */
 bool
 Pattern::compile()
 {
-  const char *errPtr;    /* PCRE error */
-  int         errOffset; /* PCRE error offset */
+  std::string error;
+  int         errOffset;
 
   CacheKeyDebug("compiling pattern:'%s', replace: %s, replacement:'%s'", 
_pattern.c_str(), _replace ? "true" : "false",
                 _replacement.c_str());
 
-  _re = pcre_compile(_pattern.c_str(), /* the pattern */
-                     0,                /* options */
-                     &errPtr,          /* for error message */
-                     &errOffset,       /* for error offset */
-                     nullptr);         /* use default character tables */
-
-  if (nullptr == _re) {
-    CacheKeyError("compile of regex '%s' at char %d: %s", _pattern.c_str(), 
errOffset, errPtr);
-
-    return false;
-  }
-
-  _extra = pcre_study(_re, 0, &errPtr);
-
-  if ((nullptr == _extra) && (nullptr != errPtr) && (0 != *errPtr)) {
-    CacheKeyError("failed to study regex '%s': %s", _pattern.c_str(), errPtr);
-
-    pcre_free(_re);
-    _re = nullptr;
+  if (!_re.compile(_pattern, error, errOffset)) {
+    CacheKeyError("compile of regex '%s' at char %d: %s", _pattern.c_str(), 
errOffset, error.c_str());
     return false;
   }
 
@@ -394,10 +336,6 @@ Pattern::compile()
     }
   }
 
-  if (!success) {
-    pcreFree();
-  }
-
   return success;
 }
 
diff --git a/plugins/cachekey/pattern.h b/plugins/cachekey/pattern.h
index a4738e56b9..e3f441d27a 100644
--- a/plugins/cachekey/pattern.h
+++ b/plugins/cachekey/pattern.h
@@ -18,32 +18,26 @@
 
 /**
  * @file pattern.h
- * @brief PRCE related classes (header file).
+ * @brief Regex related classes (header file).
  */
 
 #pragma once
 
 #include "tscore/ink_defs.h"
-
-#ifdef HAVE_PCRE_PCRE_H
-#include <pcre/pcre.h>
-#else
-#include <pcre.h>
-#endif
+#include "tsutil/Regex.h"
 
 #include "common.h"
 
 /**
- * @brief PCRE matching, capturing and replacing
+ * @brief Regex matching, capturing and replacing
  */
 class Pattern
 {
 public:
-  static const int TOKENCOUNT = 10;             /**< @brief Capturing groups 
$0..$9 */
-  static const int OVECOUNT   = TOKENCOUNT * 3; /**< @brief pcre_exec() array 
count, handle 10 capture groups */
+  static const int TOKENCOUNT = 10; /**< @brief Capturing groups $0..$9 */
 
   Pattern();
-  virtual ~Pattern();
+  ~Pattern() = default;
 
   bool init(const String &pattern, const String &replacement, bool replace);
   bool init(const String &config);
@@ -55,13 +49,12 @@ public:
 
 private:
   bool compile();
-  void pcreFree();
 
-  pcre       *_re    = nullptr; /**< @brief PCRE compiled info structure, 
computed during initialization */
-  pcre_extra *_extra = nullptr; /**< @brief PCRE study data block, computed 
during initialization */
+  Regex _re; /**< @brief Regex compiled object */
 
-  String _pattern;     /**< @brief PCRE pattern string, containing PCRE 
patterns and capturing groups. */
-  String _replacement; /**< @brief PCRE replacement string, containing $0..$9 
to be replaced with content of the capturing groups */
+  String _pattern; /**< @brief Regex pattern string, containing regex patterns 
and capturing groups. */
+  String
+    _replacement; /**< @brief Regex replacement string, containing $0..$9 to 
be replaced with content of the capturing groups */
 
   bool _replace = false; /**< @brief true if a replacement is needed, false if 
not, this is to distinguish between an empty
                     replacement string and no replacement needed case */
diff --git a/plugins/cachekey/unit_tests/pattern_test.cc 
b/plugins/cachekey/unit_tests/pattern_test.cc
index 01b4bb09b5..77f8bdb325 100644
--- a/plugins/cachekey/unit_tests/pattern_test.cc
+++ b/plugins/cachekey/unit_tests/pattern_test.cc
@@ -118,4 +118,371 @@ TEST_CASE("Pattern compile and match behavior", 
"[cachekey][pattern]")
     std::string long_s(10000, 'a');
     CHECK(p.match(long_s.c_str()) == true);
   }
+
+  SECTION("Config string parsing - pattern only")
+  {
+    Pattern p;
+    REQUIRE(p.init("^test-\\d+$"));
+    CHECK(p.match("test-123") == true);
+    CHECK(p.match("test-abc") == false);
+  }
+
+  SECTION("Config string parsing - pattern with replacement")
+  {
+    Pattern p;
+    REQUIRE(p.init("/^(\\w+)-(\\d+)$/$2:$1/"));
+    String res;
+    CHECK(p.replace("foo-42", res));
+    CHECK(res == "42:foo");
+  }
+
+  SECTION("Config string parsing - escaped slashes in pattern")
+  {
+    Pattern p;
+    REQUIRE(p.init("/path\\/to\\/file/$0/"));
+    String res;
+    CHECK(p.replace("path/to/file", res));
+    CHECK(res == "path/to/file");
+  }
+
+  SECTION("Config string parsing - escaped slashes in replacement")
+  {
+    Pattern p;
+    REQUIRE(p.init("/(\\w+)/prefix\\/$1/"));
+    String res;
+    CHECK(p.replace("test", res));
+    CHECK(res == "prefix/test");
+  }
+
+  SECTION("Config string parsing - invalid format missing closing slash")
+  {
+    Pattern p;
+    CHECK(p.init("/pattern/replacement") == false);
+  }
+
+  SECTION("Config string parsing - invalid format no slashes")
+  {
+    Pattern p;
+    CHECK(p.init("/pattern") == false);
+  }
+
+  SECTION("Replacement with multiple groups in different order")
+  {
+    Pattern p;
+    REQUIRE(p.init("^(\\w)(\\w)(\\w)$", "$3$1$2", true));
+    String res;
+    CHECK(p.replace("abc", res));
+    CHECK(res == "cab");
+  }
+
+  SECTION("Replacement with group $0 (entire match)")
+  {
+    Pattern p;
+    REQUIRE(p.init("test", "[$0]", true));
+    String res;
+    CHECK(p.replace("test", res));
+    CHECK(res == "[test]");
+  }
+
+  SECTION("Replacement with repeated group references")
+  {
+    Pattern p;
+    REQUIRE(p.init("(\\w+)", "$1-$1", true));
+    String res;
+    CHECK(p.replace("foo", res));
+    CHECK(res == "foo-foo");
+  }
+
+  SECTION("Replacement with static text around groups")
+  {
+    Pattern p;
+    REQUIRE(p.init("(\\d+)", "num=$1;", true));
+    String res;
+    CHECK(p.replace("123", res));
+    CHECK(res == "num=123;");
+  }
+
+  SECTION("Replacement with invalid group reference")
+  {
+    Pattern p;
+    REQUIRE(p.init("(\\w+)", "$5", true)); // only 2 groups (0 and 1)
+    String res;
+    // Should fail because $5 doesn't exist
+    CHECK(p.replace("test", res) == false);
+  }
+
+  SECTION("process() method - capture mode (no replacement)")
+  {
+    Pattern p;
+    REQUIRE(p.init("^(\\w+)-(\\d+)$"));
+    StringVector result;
+    CHECK(p.process("item-456", result));
+    // process() should skip group 0 when no replacement, only return 
capturing groups
+    CHECK(result.size() == 2);
+    CHECK(result[0] == "item");
+    CHECK(result[1] == "456");
+  }
+
+  SECTION("process() method - capture mode with single group")
+  {
+    Pattern p;
+    REQUIRE(p.init("test"));
+    StringVector result;
+    CHECK(p.process("test", result));
+    // When there's only group 0, process() returns it
+    CHECK(result.size() == 1);
+    CHECK(result[0] == "test");
+  }
+
+  SECTION("process() method - replace mode")
+  {
+    Pattern p;
+    REQUIRE(p.init("/^(\\w+)-(\\d+)$/$1_$2/"));
+    StringVector result;
+    CHECK(p.process("foo-99", result));
+    CHECK(result.size() == 1);
+    CHECK(result[0] == "foo_99");
+  }
+
+  SECTION("process() method - no match")
+  {
+    Pattern p;
+    REQUIRE(p.init("^test$"));
+    StringVector result;
+    CHECK(p.process("nomatch", result) == false);
+    CHECK(result.size() == 0);
+  }
+
+  SECTION("Special characters in pattern")
+  {
+    Pattern p;
+    REQUIRE(p.init("\\$\\d+\\.\\d+"));
+    CHECK(p.match("$123.45") == true);
+    CHECK(p.match("123.45") == false);
+  }
+
+  SECTION("Anchored patterns")
+  {
+    Pattern p1, p2;
+    REQUIRE(p1.init("test"));   // unanchored
+    REQUIRE(p2.init("^test$")); // anchored
+
+    CHECK(p1.match("pretest") == true);
+    CHECK(p2.match("pretest") == false);
+    CHECK(p2.match("test") == true);
+  }
+}
+
+TEST_CASE("MultiPattern tests", "[cachekey][pattern][multipattern]")
+{
+  SECTION("Empty multipattern")
+  {
+    MultiPattern mp("test");
+    CHECK(mp.empty() == true);
+    CHECK(mp.name() == "test");
+    CHECK(mp.match("anything") == false);
+  }
+
+  SECTION("Single pattern match")
+  {
+    MultiPattern mp("mobile");
+    auto         p = std::make_unique<Pattern>();
+    REQUIRE(p->init("iPhone"));
+    mp.add(std::move(p));
+
+    CHECK(mp.empty() == false);
+    CHECK(mp.match("Mozilla/5.0 (iPhone; CPU iPhone OS") == true);
+    CHECK(mp.match("Mozilla/5.0 (Windows NT 10.0") == false);
+  }
+
+  SECTION("Multiple patterns - first match wins")
+  {
+    MultiPattern mp("devices");
+
+    auto p1 = std::make_unique<Pattern>();
+    REQUIRE(p1->init("Android"));
+    mp.add(std::move(p1));
+
+    auto p2 = std::make_unique<Pattern>();
+    REQUIRE(p2->init("iPhone"));
+    mp.add(std::move(p2));
+
+    CHECK(mp.match("Android device") == true);
+    CHECK(mp.match("iPhone device") == true);
+    CHECK(mp.match("Windows device") == false);
+  }
+
+  SECTION("MultiPattern process with captures")
+  {
+    MultiPattern mp("versions");
+
+    auto p1 = std::make_unique<Pattern>();
+    REQUIRE(p1->init("Chrome/(\\d+)"));
+    mp.add(std::move(p1));
+
+    auto p2 = std::make_unique<Pattern>();
+    REQUIRE(p2->init("Firefox/(\\d+)"));
+    mp.add(std::move(p2));
+
+    StringVector result;
+    CHECK(mp.process("Mozilla/5.0 Chrome/91.0", result) == true);
+    CHECK(result.size() >= 1);
+    CHECK(result[0] == "91");
+
+    result.clear();
+    CHECK(mp.process("Mozilla/5.0 Firefox/89.0", result) == true);
+    CHECK(result.size() >= 1);
+    CHECK(result[0] == "89");
+  }
+}
+
+TEST_CASE("NonMatchingMultiPattern tests", "[cachekey][pattern][nonmatching]")
+{
+  SECTION("NonMatchingMultiPattern - returns true when nothing matches")
+  {
+    NonMatchingMultiPattern nmp("exclude");
+
+    auto p1 = std::make_unique<Pattern>();
+    REQUIRE(p1->init("bot"));
+    nmp.add(std::move(p1));
+
+    // Should return true (no match = allowed)
+    CHECK(nmp.match("normal user agent") == true);
+    // Should return false (matched = not allowed)
+    CHECK(nmp.match("googlebot") == false);
+  }
+
+  SECTION("NonMatchingMultiPattern - multiple exclusions")
+  {
+    NonMatchingMultiPattern nmp("bots");
+
+    auto p1 = std::make_unique<Pattern>();
+    REQUIRE(p1->init("bot"));
+    nmp.add(std::move(p1));
+
+    auto p2 = std::make_unique<Pattern>();
+    REQUIRE(p2->init("crawler"));
+    nmp.add(std::move(p2));
+
+    CHECK(nmp.match("normal browser") == true);
+    CHECK(nmp.match("googlebot") == false);
+    CHECK(nmp.match("some crawler") == false);
+  }
+}
+
+TEST_CASE("Classifier tests", "[cachekey][pattern][classifier]")
+{
+  SECTION("Empty classifier")
+  {
+    Classifier c;
+    String     name;
+    CHECK(c.classify("test", name) == false);
+  }
+
+  SECTION("Single class classification")
+  {
+    Classifier c;
+
+    auto mp = std::make_unique<MultiPattern>("mobile");
+    auto p1 = std::make_unique<Pattern>();
+    REQUIRE(p1->init("iPhone|Android"));
+    mp->add(std::move(p1));
+    c.add(std::move(mp));
+
+    String name;
+    CHECK(c.classify("Mozilla/5.0 (iPhone", name) == true);
+    CHECK(name == "mobile");
+
+    CHECK(c.classify("Mozilla/5.0 (Windows", name) == false);
+  }
+
+  SECTION("Multiple classes - first match wins")
+  {
+    Classifier c;
+
+    // Add mobile class first
+    auto mp_mobile = std::make_unique<MultiPattern>("mobile");
+    auto p1        = std::make_unique<Pattern>();
+    REQUIRE(p1->init("iPhone|Android"));
+    mp_mobile->add(std::move(p1));
+    c.add(std::move(mp_mobile));
+
+    // Add tablet class second
+    auto mp_tablet = std::make_unique<MultiPattern>("tablet");
+    auto p2        = std::make_unique<Pattern>();
+    REQUIRE(p2->init("iPad"));
+    mp_tablet->add(std::move(p2));
+    c.add(std::move(mp_tablet));
+
+    // Add desktop class third
+    auto mp_desktop = std::make_unique<MultiPattern>("desktop");
+    auto p3         = std::make_unique<Pattern>();
+    REQUIRE(p3->init("Windows|Macintosh"));
+    mp_desktop->add(std::move(p3));
+    c.add(std::move(mp_desktop));
+
+    String name;
+    CHECK(c.classify("Mozilla/5.0 (Android", name) == true);
+    CHECK(name == "mobile");
+
+    CHECK(c.classify("Mozilla/5.0 (iPad", name) == true);
+    CHECK(name == "tablet");
+
+    CHECK(c.classify("Mozilla/5.0 (Windows NT", name) == true);
+    CHECK(name == "desktop");
+
+    CHECK(c.classify("Unknown/1.0", name) == false);
+  }
+
+  SECTION("Classifier with empty multipatterns")
+  {
+    Classifier c;
+
+    // Add an empty multipattern
+    auto mp = std::make_unique<MultiPattern>("empty");
+    c.add(std::move(mp));
+
+    String name;
+    // Should skip empty patterns
+    CHECK(c.classify("test", name) == false);
+  }
+
+  SECTION("Complex real-world classification")
+  {
+    Classifier c;
+
+    // Mobile phones
+    auto mp_phone = std::make_unique<MultiPattern>("phone");
+    auto p1       = std::make_unique<Pattern>();
+    REQUIRE(p1->init("iPhone"));
+    mp_phone->add(std::move(p1));
+    auto p2 = std::make_unique<Pattern>();
+    REQUIRE(p2->init("Android.*Mobile"));
+    mp_phone->add(std::move(p2));
+    c.add(std::move(mp_phone));
+
+    // Tablets
+    auto mp_tablet = std::make_unique<MultiPattern>("tablet");
+    auto p3        = std::make_unique<Pattern>();
+    REQUIRE(p3->init("iPad"));
+    mp_tablet->add(std::move(p3));
+    auto p4 = std::make_unique<Pattern>();
+    REQUIRE(p4->init("Android(?!.*Mobile)"));
+    mp_tablet->add(std::move(p4));
+    c.add(std::move(mp_tablet));
+
+    String name;
+    CHECK(c.classify("Mozilla/5.0 (iPhone; CPU iPhone OS 14_0", name) == true);
+    CHECK(name == "phone");
+
+    CHECK(c.classify("Mozilla/5.0 (Linux; Android 10; SM-G960U) Mobile", name) 
== true);
+    CHECK(name == "phone");
+
+    CHECK(c.classify("Mozilla/5.0 (iPad; CPU OS 14_0", name) == true);
+    CHECK(name == "tablet");
+
+    // Android tablet (no "Mobile" in UA)
+    CHECK(c.classify("Mozilla/5.0 (Linux; Android 10; SM-T510)", name) == 
true);
+    CHECK(name == "tablet");
+  }
 }
diff --git a/src/tsutil/unit_tests/test_Regex.cc 
b/src/tsutil/unit_tests/test_Regex.cc
index b1b2c1609d..26143208d1 100644
--- a/src/tsutil/unit_tests/test_Regex.cc
+++ b/src/tsutil/unit_tests/test_Regex.cc
@@ -258,6 +258,338 @@ TEST_CASE("Regex error codes", "[libts][Regex][errors]")
     CHECK(r.exec("foo", matches) != RE_ERROR_NOMATCH);
     CHECK(r.exec("foo", matches) == 1);
   }
+
+  SECTION("Compile error returns detailed error message")
+  {
+    Regex       r;
+    std::string error;
+    int         erroffset;
+
+    // Unclosed parenthesis should fail with error message
+    CHECK(r.compile(R"((unclosed)", error, erroffset) == false);
+    CHECK(!error.empty());
+    CHECK(erroffset > 0);
+
+    // Invalid escape sequence
+    error.clear();
+    erroffset = 0;
+    CHECK(r.compile(R"(\k)", error, erroffset) == false);
+    CHECK(!error.empty());
+
+    // Invalid character class
+    error.clear();
+    erroffset = 0;
+    CHECK(r.compile(R"([z-a])", error, erroffset) == false);
+    CHECK(!error.empty());
+  }
+}
+
+TEST_CASE("Regex::empty()", "[libts][Regex][empty]")
+{
+  SECTION("newly constructed Regex is empty")
+  {
+    Regex r;
+    CHECK(r.empty() == true);
+  }
+
+  SECTION("compiled Regex is not empty")
+  {
+    Regex r;
+    REQUIRE(r.compile("test") == true);
+    CHECK(r.empty() == false);
+  }
+
+  SECTION("failed compilation leaves Regex empty")
+  {
+    Regex r;
+    REQUIRE(r.compile("(invalid") == false);
+    CHECK(r.empty() == true);
+  }
+
+  SECTION("recompiling non-empty Regex")
+  {
+    Regex r;
+    REQUIRE(r.compile("foo") == true);
+    CHECK(r.empty() == false);
+
+    REQUIRE(r.compile("bar") == true);
+    CHECK(r.empty() == false);
+  }
+}
+
+TEST_CASE("Regex move semantics", "[libts][Regex][move]")
+{
+  SECTION("move constructor")
+  {
+    Regex r1;
+    REQUIRE(r1.compile("^test$") == true);
+    CHECK(r1.exec("test") == true);
+    CHECK(r1.empty() == false);
+
+    // Move construct r2 from r1
+    Regex r2(std::move(r1));
+    CHECK(r2.empty() == false);
+    CHECK(r2.exec("test") == true);
+    CHECK(r2.exec("foo") == false);
+  }
+
+  SECTION("move assignment operator")
+  {
+    Regex r1;
+    REQUIRE(r1.compile("^test$") == true);
+
+    Regex r2;
+    REQUIRE(r2.compile("^foo$") == true);
+
+    // Move assign r1 to r2
+    r2 = std::move(r1);
+    CHECK(r2.empty() == false);
+    CHECK(r2.exec("test") == true);
+    CHECK(r2.exec("foo") == false);
+  }
+
+  SECTION("move empty Regex")
+  {
+    Regex r1; // empty
+    Regex r2(std::move(r1));
+    CHECK(r2.empty() == true);
+  }
+}
+
+TEST_CASE("Regex RE_UNANCHORED flag", "[libts][Regex][flags][RE_UNANCHORED]")
+{
+  SECTION("RE_UNANCHORED allows matching anywhere in multiline text")
+  {
+    Regex r;
+    // Pattern that should match "test" at start of any line
+    REQUIRE(r.compile("^test", RE_UNANCHORED) == true);
+
+    // Should match at start of string
+    CHECK(r.exec("test\nfoo") == true);
+
+    // Should match after newline (multiline mode)
+    CHECK(r.exec("foo\ntest") == true);
+
+    // Should not match in middle of line
+    CHECK(r.exec("foo test") == false);
+  }
+
+  SECTION("default (without RE_UNANCHORED) only matches at string start")
+  {
+    Regex r;
+    REQUIRE(r.compile("^test") == true);
+
+    // Should match at start
+    CHECK(r.exec("test\nfoo") == true);
+
+    // Should NOT match after newline without RE_UNANCHORED
+    CHECK(r.exec("foo\ntest") == false);
+  }
+}
+
+TEST_CASE("RegexMatches edge cases", "[libts][Regex][RegexMatches]")
+{
+  SECTION("RegexMatches size after no match")
+  {
+    Regex        r;
+    RegexMatches matches;
+    REQUIRE(r.compile("test") == true);
+
+    int count = r.exec("nomatch", matches);
+    CHECK(count == RE_ERROR_NOMATCH);
+    CHECK(matches.size() == RE_ERROR_NOMATCH);
+  }
+
+  SECTION("RegexMatches operator[] with various capture counts")
+  {
+    Regex r;
+    REQUIRE(r.compile("(\\w+)-(\\d+)-(\\w+)") == true);
+
+    RegexMatches matches;
+    int          count = r.exec("foo-123-bar", matches);
+    CHECK(count == 4); // whole match + 3 groups
+
+    CHECK(matches[0] == "foo-123-bar");
+    CHECK(matches[1] == "foo");
+    CHECK(matches[2] == "123");
+    CHECK(matches[3] == "bar");
+  }
+
+  SECTION("RegexMatches with zero-length captures")
+  {
+    Regex r;
+    REQUIRE(r.compile("(\\w*)-(\\w*)") == true);
+
+    RegexMatches matches;
+
+    // First group empty, second has content
+    int count = r.exec("-foo", matches);
+    CHECK(count == 3);
+    CHECK(matches[0] == "-foo");
+    CHECK(matches[1] == "");
+    CHECK(matches[2] == "foo");
+  }
+
+  SECTION("RegexMatches with optional groups")
+  {
+    Regex r;
+    REQUIRE(r.compile("(\\w+)-(\\d+)?") == true);
+
+    RegexMatches matches;
+
+    // With optional group present
+    int count = r.exec("foo-123", matches);
+    CHECK(count == 3);
+    CHECK(matches[1] == "foo");
+    CHECK(matches[2] == "123");
+
+    // With optional group absent - note: PCRE2 may still count it
+    count = r.exec("foo-", matches);
+    CHECK(count >= 2); // At least whole match + first group
+    CHECK(matches[1] == "foo");
+  }
+}
+
+TEST_CASE("Regex with special characters", "[libts][Regex][special]")
+{
+  SECTION("escaped special characters")
+  {
+    Regex r;
+    REQUIRE(r.compile(R"(\$\d+\.\d+)") == true);
+
+    CHECK(r.exec("$123.45") == true);
+    CHECK(r.exec("123.45") == false);
+    CHECK(r.exec("$12.3") == true);
+  }
+
+  SECTION("character classes")
+  {
+    Regex r;
+    REQUIRE(r.compile(R"([A-Z][a-z]+)") == true);
+
+    CHECK(r.exec("Hello") == true);
+    CHECK(r.exec("hello") == false);
+    CHECK(r.exec("HELLO") == false);
+  }
+
+  SECTION("quantifiers")
+  {
+    Regex r;
+    REQUIRE(r.compile(R"(\d{3}-\d{4})") == true);
+
+    CHECK(r.exec("123-4567") == true);
+    CHECK(r.exec("12-4567") == false);
+    CHECK(r.exec("123-456") == false);
+  }
+
+  SECTION("alternation")
+  {
+    Regex r;
+    REQUIRE(r.compile(R"(foo|bar|baz)") == true);
+
+    CHECK(r.exec("foo") == true);
+    CHECK(r.exec("bar") == true);
+    CHECK(r.exec("baz") == true);
+    CHECK(r.exec("qux") == false);
+  }
+}
+
+TEST_CASE("Regex with complex patterns", "[libts][Regex][complex]")
+{
+  SECTION("greedy vs non-greedy quantifiers")
+  {
+    Regex greedy, non_greedy;
+    REQUIRE(greedy.compile(R"(<.*>)") == true);
+    REQUIRE(non_greedy.compile(R"(<.*?>)") == true);
+
+    RegexMatches matches;
+
+    // Greedy matches everything
+    int count = greedy.exec("<div>content</div>", matches);
+    CHECK(count == 1);
+    CHECK(matches[0] == "<div>content</div>");
+
+    // Non-greedy matches just first tag
+    count = non_greedy.exec("<div>content</div>", matches);
+    CHECK(count == 1);
+    CHECK(matches[0] == "<div>");
+  }
+
+  SECTION("lookahead assertions")
+  {
+    Regex r;
+    // Match "foo" only if followed by "bar"
+    REQUIRE(r.compile(R"(foo(?=bar))") == true);
+
+    CHECK(r.exec("foobar") == true);
+    CHECK(r.exec("foobaz") == false);
+    CHECK(r.exec("foo") == false);
+  }
+
+  SECTION("negative lookahead")
+  {
+    Regex r;
+    // Match "foo" only if NOT followed by "bar"
+    REQUIRE(r.compile(R"(foo(?!bar))") == true);
+
+    CHECK(r.exec("foobar") == false);
+    CHECK(r.exec("foobaz") == true);
+    CHECK(r.exec("foo") == true);
+  }
+
+  SECTION("word boundaries")
+  {
+    Regex r;
+    REQUIRE(r.compile(R"(\btest\b)") == true);
+
+    CHECK(r.exec("test") == true);
+    CHECK(r.exec("a test here") == true);
+    CHECK(r.exec("testing") == false);
+    CHECK(r.exec("attest") == false);
+  }
+}
+
+TEST_CASE("Regex recompilation behavior", "[libts][Regex][recompile]")
+{
+  SECTION("recompile frees previous pattern")
+  {
+    Regex r;
+
+    REQUIRE(r.compile("foo") == true);
+    CHECK(r.exec("foo") == true);
+    CHECK(r.exec("bar") == false);
+
+    // Recompile with different pattern
+    REQUIRE(r.compile("bar") == true);
+    CHECK(r.exec("bar") == true);
+    CHECK(r.exec("foo") == false);
+  }
+
+  SECTION("recompile after failed compilation")
+  {
+    Regex r;
+
+    // First compilation fails
+    REQUIRE(r.compile("(invalid") == false);
+    CHECK(r.empty() == true);
+
+    // Should still be able to compile successfully
+    REQUIRE(r.compile("valid") == true);
+    CHECK(r.empty() == false);
+    CHECK(r.exec("valid") == true);
+  }
+
+  SECTION("recompile with different flags")
+  {
+    Regex r;
+
+    REQUIRE(r.compile("test") == true);
+    CHECK(r.exec("TEST") == false);
+
+    // Recompile with case insensitive flag
+    REQUIRE(r.compile("test", RE_CASE_INSENSITIVE) == true);
+    CHECK(r.exec("TEST") == true);
+  }
 }
 
 TEST_CASE("Regex copy constructor", "[libts][Regex][copy]")

Reply via email to