This is an automated email from the ASF dual-hosted git repository.
bcall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push:
new a57a824cab Convert cachekey plugin to use Regex class (PCRE2) instead
of PCRE (#12607)
a57a824cab is described below
commit a57a824cabd217fb4001452cafce5278af286459
Author: Bryan Call <[email protected]>
AuthorDate: Fri Nov 7 11:07:39 2025 -0800
Convert cachekey plugin to use Regex class (PCRE2) instead of PCRE (#12607)
---
plugins/cachekey/pattern.cc | 134 +++-------
plugins/cachekey/pattern.h | 25 +-
plugins/cachekey/unit_tests/pattern_test.cc | 367 ++++++++++++++++++++++++++++
src/tsutil/unit_tests/test_Regex.cc | 332 +++++++++++++++++++++++++
4 files changed, 744 insertions(+), 114 deletions(-)
diff --git a/plugins/cachekey/pattern.cc b/plugins/cachekey/pattern.cc
index 8492d4c360..e871e15f03 100644
--- a/plugins/cachekey/pattern.cc
+++ b/plugins/cachekey/pattern.cc
@@ -18,7 +18,7 @@
/**
* @file pattern.cc
- * @brief PRCE related classes.
+ * @brief Regex related classes.
* @see pattern.h
*/
@@ -41,16 +41,14 @@ replaceString(String &str, const String &from, const String
&to)
Pattern::Pattern() : _pattern(""), _replacement("") {}
/**
- * @brief Initializes PCRE pattern by providing the subject and replacement
strings.
- * @param pattern PCRE pattern, a string containing PCRE patterns, capturing
groups.
- * @param replacement PCRE replacement, a string where $0 ... $9 will be
replaced with the corresponding capturing groups
+ * @brief Initializes Regex pattern by providing the subject and replacement
strings.
+ * @param pattern Regex pattern, a string containing regex patterns, capturing
groups.
+ * @param replacement Regex replacement, a string where $0 ... $9 will be
replaced with the corresponding capturing groups
* @return true if successful, false if failure
*/
bool
Pattern::init(const String &pattern, const String &replacement, bool replace)
{
- pcreFree();
-
_pattern.assign(pattern);
_replacement.assign(replacement);
_replace = replace;
@@ -59,7 +57,6 @@ Pattern::init(const String &pattern, const String
&replacement, bool replace)
if (!compile()) {
CacheKeyDebug("failed to initialize pattern:'%s', replacement:'%s'",
pattern.c_str(), replacement.c_str());
- pcreFree();
return false;
}
@@ -67,9 +64,9 @@ Pattern::init(const String &pattern, const String
&replacement, bool replace)
}
/**
- * @brief Initializes PCRE pattern by providing the pattern only or
pattern+replacement in a single configuration string.
+ * @brief Initializes Regex pattern by providing the pattern only or
pattern+replacement in a single configuration string.
* @see init()
- * @param config PCRE pattern <pattern> or PCRE pattern + replacement in
format /<pattern>/<replacement>/
+ * @param config Regex pattern <pattern> or Regex pattern + replacement in
format /<pattern>/<replacement>/
* @return true if successful, false if failure
*/
bool
@@ -130,32 +127,7 @@ Pattern::init(const String &config)
bool
Pattern::empty() const
{
- return _pattern.empty() || nullptr == _re;
-}
-
-/**
- * @brief Frees PCRE library related resources.
- */
-void
-Pattern::pcreFree()
-{
- if (_re) {
- pcre_free(_re);
- _re = nullptr;
- }
-
- if (_extra) {
- pcre_free(_extra);
- _extra = nullptr;
- }
-}
-
-/**
- * @brief Destructor, frees PCRE related resources.
- */
-Pattern::~Pattern()
-{
- pcreFree();
+ return _pattern.empty() || _re.empty();
}
/**
@@ -198,23 +170,23 @@ Pattern::process(const String &subject, StringVector
&result)
}
/**
- * @brief PCRE matches a subject string against the regex pattern.
- * @param subject PCRE subject
+ * @brief Regex matches a subject string against the regex pattern.
+ * @param subject Regex subject
* @return true - matched, false - did not.
*/
bool
Pattern::match(const String &subject)
{
- int matchCount;
CacheKeyDebug("matching '%s' to '%s'", _pattern.c_str(), subject.c_str());
- if (!_re) {
+ if (_re.empty()) {
return false;
}
- matchCount = pcre_exec(_re, _extra, subject.c_str(), subject.length(), 0,
PCRE_NOTEMPTY, nullptr, 0);
+ RegexMatches matches;
+ int matchCount = _re.exec(subject, matches, RE_NOTEMPTY);
if (matchCount < 0) {
- if (matchCount != PCRE_ERROR_NOMATCH) {
+ if (matchCount != RE_ERROR_NOMATCH) {
CacheKeyError("matching error %d", matchCount);
}
return false;
@@ -224,38 +196,34 @@ Pattern::match(const String &subject)
}
/**
- * @brief Return all PCRE capture groups that matched in the subject string
- * @param subject PCRE subject string
+ * @brief Return all Regex capture groups that matched in the subject string
+ * @param subject Regex subject string
* @param result reference to vector of strings containing all capture groups
*/
bool
Pattern::capture(const String &subject, StringVector &result)
{
- int matchCount;
- int ovector[OVECOUNT];
-
CacheKeyDebug("capturing '%s' from '%s'", _pattern.c_str(), subject.c_str());
- if (!_re) {
+ if (_re.empty()) {
CacheKeyError("regular expression not initialized");
return false;
}
- matchCount = pcre_exec(_re, nullptr, subject.c_str(), subject.length(), 0,
PCRE_NOTEMPTY, ovector, OVECOUNT);
+ RegexMatches matches;
+ int matchCount = _re.exec(subject, matches, RE_NOTEMPTY);
if (matchCount < 0) {
- if (matchCount != PCRE_ERROR_NOMATCH) {
+ if (matchCount != RE_ERROR_NOMATCH) {
CacheKeyError("matching error %d", matchCount);
}
return false;
}
for (int i = 0; i < matchCount; i++) {
- int start = ovector[2 * i];
- int length = ovector[2 * i + 1] - ovector[2 * i];
+ std::string_view capture = matches[i];
+ String dst(capture.data(), capture.length());
- String dst(subject, start, length);
-
- CacheKeyDebug("capturing '%s' %d[%d,%d]", dst.c_str(), i, ovector[2 * i],
ovector[2 * i + 1]);
+ CacheKeyDebug("capturing '%s' %d", dst.c_str(), i);
result.push_back(dst);
}
@@ -263,27 +231,25 @@ Pattern::capture(const String &subject, StringVector
&result)
}
/**
- * @brief Replaces all replacements found in the replacement string with what
matched in the PCRE capturing groups.
- * @param subject PCRE subject string
+ * @brief Replaces all replacements found in the replacement string with what
matched in the Regex capturing groups.
+ * @param subject Regex subject string
* @param result reference to A string where the result of the replacement
will be stored
* @return true - success, false - nothing matched or failure.
*/
bool
Pattern::replace(const String &subject, String &result)
{
- int matchCount;
- int ovector[OVECOUNT];
-
CacheKeyDebug("replacing:'%s' in pattern:'%s', subject:'%s'",
_replacement.c_str(), _pattern.c_str(), subject.c_str());
- if (!_re || !_replace) {
+ if (_re.empty() || !_replace) {
CacheKeyError("regular expression not initialized or not configured to
replace");
return false;
}
- matchCount = pcre_exec(_re, nullptr, subject.c_str(), subject.length(), 0,
PCRE_NOTEMPTY, ovector, OVECOUNT);
+ RegexMatches matches;
+ int matchCount = _re.exec(subject, matches, RE_NOTEMPTY);
if (matchCount < 0) {
- if (matchCount != PCRE_ERROR_NOMATCH) {
+ if (matchCount != RE_ERROR_NOMATCH) {
CacheKeyError("matching error %d", matchCount);
}
return false;
@@ -299,18 +265,11 @@ Pattern::replace(const String &subject, String &result)
int previous = 0;
for (int i = 0; i < _tokenCount; i++) {
- int replIndex = _tokens[i];
- int start = ovector[2 * replIndex];
- int length = ovector[2 * replIndex + 1] - ovector[2 * replIndex];
-
- /* Handle the case when no match / a group capture result in an empty
string */
- if (start < 0) {
- start = 0;
- length = 0;
- }
+ int replIndex = _tokens[i];
+ std::string_view capture = matches[replIndex];
String src(_replacement, _tokenOffset[i], 2);
- String dst(subject, start, length);
+ String dst(capture.data(), capture.length());
CacheKeyDebug("replacing '%s' with '%s'", src.c_str(), dst.c_str());
@@ -328,37 +287,20 @@ Pattern::replace(const String &subject, String &result)
}
/**
- * @brief PCRE compiles the regex, called only during initialization.
+ * @brief Compiles the regex, called only during initialization.
* @return true if successful, false if not.
*/
bool
Pattern::compile()
{
- const char *errPtr; /* PCRE error */
- int errOffset; /* PCRE error offset */
+ std::string error;
+ int errOffset;
CacheKeyDebug("compiling pattern:'%s', replace: %s, replacement:'%s'",
_pattern.c_str(), _replace ? "true" : "false",
_replacement.c_str());
- _re = pcre_compile(_pattern.c_str(), /* the pattern */
- 0, /* options */
- &errPtr, /* for error message */
- &errOffset, /* for error offset */
- nullptr); /* use default character tables */
-
- if (nullptr == _re) {
- CacheKeyError("compile of regex '%s' at char %d: %s", _pattern.c_str(),
errOffset, errPtr);
-
- return false;
- }
-
- _extra = pcre_study(_re, 0, &errPtr);
-
- if ((nullptr == _extra) && (nullptr != errPtr) && (0 != *errPtr)) {
- CacheKeyError("failed to study regex '%s': %s", _pattern.c_str(), errPtr);
-
- pcre_free(_re);
- _re = nullptr;
+ if (!_re.compile(_pattern, error, errOffset)) {
+ CacheKeyError("compile of regex '%s' at char %d: %s", _pattern.c_str(),
errOffset, error.c_str());
return false;
}
@@ -394,10 +336,6 @@ Pattern::compile()
}
}
- if (!success) {
- pcreFree();
- }
-
return success;
}
diff --git a/plugins/cachekey/pattern.h b/plugins/cachekey/pattern.h
index a4738e56b9..e3f441d27a 100644
--- a/plugins/cachekey/pattern.h
+++ b/plugins/cachekey/pattern.h
@@ -18,32 +18,26 @@
/**
* @file pattern.h
- * @brief PRCE related classes (header file).
+ * @brief Regex related classes (header file).
*/
#pragma once
#include "tscore/ink_defs.h"
-
-#ifdef HAVE_PCRE_PCRE_H
-#include <pcre/pcre.h>
-#else
-#include <pcre.h>
-#endif
+#include "tsutil/Regex.h"
#include "common.h"
/**
- * @brief PCRE matching, capturing and replacing
+ * @brief Regex matching, capturing and replacing
*/
class Pattern
{
public:
- static const int TOKENCOUNT = 10; /**< @brief Capturing groups
$0..$9 */
- static const int OVECOUNT = TOKENCOUNT * 3; /**< @brief pcre_exec() array
count, handle 10 capture groups */
+ static const int TOKENCOUNT = 10; /**< @brief Capturing groups $0..$9 */
Pattern();
- virtual ~Pattern();
+ ~Pattern() = default;
bool init(const String &pattern, const String &replacement, bool replace);
bool init(const String &config);
@@ -55,13 +49,12 @@ public:
private:
bool compile();
- void pcreFree();
- pcre *_re = nullptr; /**< @brief PCRE compiled info structure,
computed during initialization */
- pcre_extra *_extra = nullptr; /**< @brief PCRE study data block, computed
during initialization */
+ Regex _re; /**< @brief Regex compiled object */
- String _pattern; /**< @brief PCRE pattern string, containing PCRE
patterns and capturing groups. */
- String _replacement; /**< @brief PCRE replacement string, containing $0..$9
to be replaced with content of the capturing groups */
+ String _pattern; /**< @brief Regex pattern string, containing regex patterns
and capturing groups. */
+ String
+ _replacement; /**< @brief Regex replacement string, containing $0..$9 to
be replaced with content of the capturing groups */
bool _replace = false; /**< @brief true if a replacement is needed, false if
not, this is to distinguish between an empty
replacement string and no replacement needed case */
diff --git a/plugins/cachekey/unit_tests/pattern_test.cc
b/plugins/cachekey/unit_tests/pattern_test.cc
index 01b4bb09b5..77f8bdb325 100644
--- a/plugins/cachekey/unit_tests/pattern_test.cc
+++ b/plugins/cachekey/unit_tests/pattern_test.cc
@@ -118,4 +118,371 @@ TEST_CASE("Pattern compile and match behavior",
"[cachekey][pattern]")
std::string long_s(10000, 'a');
CHECK(p.match(long_s.c_str()) == true);
}
+
+ SECTION("Config string parsing - pattern only")
+ {
+ Pattern p;
+ REQUIRE(p.init("^test-\\d+$"));
+ CHECK(p.match("test-123") == true);
+ CHECK(p.match("test-abc") == false);
+ }
+
+ SECTION("Config string parsing - pattern with replacement")
+ {
+ Pattern p;
+ REQUIRE(p.init("/^(\\w+)-(\\d+)$/$2:$1/"));
+ String res;
+ CHECK(p.replace("foo-42", res));
+ CHECK(res == "42:foo");
+ }
+
+ SECTION("Config string parsing - escaped slashes in pattern")
+ {
+ Pattern p;
+ REQUIRE(p.init("/path\\/to\\/file/$0/"));
+ String res;
+ CHECK(p.replace("path/to/file", res));
+ CHECK(res == "path/to/file");
+ }
+
+ SECTION("Config string parsing - escaped slashes in replacement")
+ {
+ Pattern p;
+ REQUIRE(p.init("/(\\w+)/prefix\\/$1/"));
+ String res;
+ CHECK(p.replace("test", res));
+ CHECK(res == "prefix/test");
+ }
+
+ SECTION("Config string parsing - invalid format missing closing slash")
+ {
+ Pattern p;
+ CHECK(p.init("/pattern/replacement") == false);
+ }
+
+ SECTION("Config string parsing - invalid format no slashes")
+ {
+ Pattern p;
+ CHECK(p.init("/pattern") == false);
+ }
+
+ SECTION("Replacement with multiple groups in different order")
+ {
+ Pattern p;
+ REQUIRE(p.init("^(\\w)(\\w)(\\w)$", "$3$1$2", true));
+ String res;
+ CHECK(p.replace("abc", res));
+ CHECK(res == "cab");
+ }
+
+ SECTION("Replacement with group $0 (entire match)")
+ {
+ Pattern p;
+ REQUIRE(p.init("test", "[$0]", true));
+ String res;
+ CHECK(p.replace("test", res));
+ CHECK(res == "[test]");
+ }
+
+ SECTION("Replacement with repeated group references")
+ {
+ Pattern p;
+ REQUIRE(p.init("(\\w+)", "$1-$1", true));
+ String res;
+ CHECK(p.replace("foo", res));
+ CHECK(res == "foo-foo");
+ }
+
+ SECTION("Replacement with static text around groups")
+ {
+ Pattern p;
+ REQUIRE(p.init("(\\d+)", "num=$1;", true));
+ String res;
+ CHECK(p.replace("123", res));
+ CHECK(res == "num=123;");
+ }
+
+ SECTION("Replacement with invalid group reference")
+ {
+ Pattern p;
+ REQUIRE(p.init("(\\w+)", "$5", true)); // only 2 groups (0 and 1)
+ String res;
+ // Should fail because $5 doesn't exist
+ CHECK(p.replace("test", res) == false);
+ }
+
+ SECTION("process() method - capture mode (no replacement)")
+ {
+ Pattern p;
+ REQUIRE(p.init("^(\\w+)-(\\d+)$"));
+ StringVector result;
+ CHECK(p.process("item-456", result));
+ // process() should skip group 0 when no replacement, only return
capturing groups
+ CHECK(result.size() == 2);
+ CHECK(result[0] == "item");
+ CHECK(result[1] == "456");
+ }
+
+ SECTION("process() method - capture mode with single group")
+ {
+ Pattern p;
+ REQUIRE(p.init("test"));
+ StringVector result;
+ CHECK(p.process("test", result));
+ // When there's only group 0, process() returns it
+ CHECK(result.size() == 1);
+ CHECK(result[0] == "test");
+ }
+
+ SECTION("process() method - replace mode")
+ {
+ Pattern p;
+ REQUIRE(p.init("/^(\\w+)-(\\d+)$/$1_$2/"));
+ StringVector result;
+ CHECK(p.process("foo-99", result));
+ CHECK(result.size() == 1);
+ CHECK(result[0] == "foo_99");
+ }
+
+ SECTION("process() method - no match")
+ {
+ Pattern p;
+ REQUIRE(p.init("^test$"));
+ StringVector result;
+ CHECK(p.process("nomatch", result) == false);
+ CHECK(result.size() == 0);
+ }
+
+ SECTION("Special characters in pattern")
+ {
+ Pattern p;
+ REQUIRE(p.init("\\$\\d+\\.\\d+"));
+ CHECK(p.match("$123.45") == true);
+ CHECK(p.match("123.45") == false);
+ }
+
+ SECTION("Anchored patterns")
+ {
+ Pattern p1, p2;
+ REQUIRE(p1.init("test")); // unanchored
+ REQUIRE(p2.init("^test$")); // anchored
+
+ CHECK(p1.match("pretest") == true);
+ CHECK(p2.match("pretest") == false);
+ CHECK(p2.match("test") == true);
+ }
+}
+
+TEST_CASE("MultiPattern tests", "[cachekey][pattern][multipattern]")
+{
+ SECTION("Empty multipattern")
+ {
+ MultiPattern mp("test");
+ CHECK(mp.empty() == true);
+ CHECK(mp.name() == "test");
+ CHECK(mp.match("anything") == false);
+ }
+
+ SECTION("Single pattern match")
+ {
+ MultiPattern mp("mobile");
+ auto p = std::make_unique<Pattern>();
+ REQUIRE(p->init("iPhone"));
+ mp.add(std::move(p));
+
+ CHECK(mp.empty() == false);
+ CHECK(mp.match("Mozilla/5.0 (iPhone; CPU iPhone OS") == true);
+ CHECK(mp.match("Mozilla/5.0 (Windows NT 10.0") == false);
+ }
+
+ SECTION("Multiple patterns - first match wins")
+ {
+ MultiPattern mp("devices");
+
+ auto p1 = std::make_unique<Pattern>();
+ REQUIRE(p1->init("Android"));
+ mp.add(std::move(p1));
+
+ auto p2 = std::make_unique<Pattern>();
+ REQUIRE(p2->init("iPhone"));
+ mp.add(std::move(p2));
+
+ CHECK(mp.match("Android device") == true);
+ CHECK(mp.match("iPhone device") == true);
+ CHECK(mp.match("Windows device") == false);
+ }
+
+ SECTION("MultiPattern process with captures")
+ {
+ MultiPattern mp("versions");
+
+ auto p1 = std::make_unique<Pattern>();
+ REQUIRE(p1->init("Chrome/(\\d+)"));
+ mp.add(std::move(p1));
+
+ auto p2 = std::make_unique<Pattern>();
+ REQUIRE(p2->init("Firefox/(\\d+)"));
+ mp.add(std::move(p2));
+
+ StringVector result;
+ CHECK(mp.process("Mozilla/5.0 Chrome/91.0", result) == true);
+ CHECK(result.size() >= 1);
+ CHECK(result[0] == "91");
+
+ result.clear();
+ CHECK(mp.process("Mozilla/5.0 Firefox/89.0", result) == true);
+ CHECK(result.size() >= 1);
+ CHECK(result[0] == "89");
+ }
+}
+
+TEST_CASE("NonMatchingMultiPattern tests", "[cachekey][pattern][nonmatching]")
+{
+ SECTION("NonMatchingMultiPattern - returns true when nothing matches")
+ {
+ NonMatchingMultiPattern nmp("exclude");
+
+ auto p1 = std::make_unique<Pattern>();
+ REQUIRE(p1->init("bot"));
+ nmp.add(std::move(p1));
+
+ // Should return true (no match = allowed)
+ CHECK(nmp.match("normal user agent") == true);
+ // Should return false (matched = not allowed)
+ CHECK(nmp.match("googlebot") == false);
+ }
+
+ SECTION("NonMatchingMultiPattern - multiple exclusions")
+ {
+ NonMatchingMultiPattern nmp("bots");
+
+ auto p1 = std::make_unique<Pattern>();
+ REQUIRE(p1->init("bot"));
+ nmp.add(std::move(p1));
+
+ auto p2 = std::make_unique<Pattern>();
+ REQUIRE(p2->init("crawler"));
+ nmp.add(std::move(p2));
+
+ CHECK(nmp.match("normal browser") == true);
+ CHECK(nmp.match("googlebot") == false);
+ CHECK(nmp.match("some crawler") == false);
+ }
+}
+
+TEST_CASE("Classifier tests", "[cachekey][pattern][classifier]")
+{
+ SECTION("Empty classifier")
+ {
+ Classifier c;
+ String name;
+ CHECK(c.classify("test", name) == false);
+ }
+
+ SECTION("Single class classification")
+ {
+ Classifier c;
+
+ auto mp = std::make_unique<MultiPattern>("mobile");
+ auto p1 = std::make_unique<Pattern>();
+ REQUIRE(p1->init("iPhone|Android"));
+ mp->add(std::move(p1));
+ c.add(std::move(mp));
+
+ String name;
+ CHECK(c.classify("Mozilla/5.0 (iPhone", name) == true);
+ CHECK(name == "mobile");
+
+ CHECK(c.classify("Mozilla/5.0 (Windows", name) == false);
+ }
+
+ SECTION("Multiple classes - first match wins")
+ {
+ Classifier c;
+
+ // Add mobile class first
+ auto mp_mobile = std::make_unique<MultiPattern>("mobile");
+ auto p1 = std::make_unique<Pattern>();
+ REQUIRE(p1->init("iPhone|Android"));
+ mp_mobile->add(std::move(p1));
+ c.add(std::move(mp_mobile));
+
+ // Add tablet class second
+ auto mp_tablet = std::make_unique<MultiPattern>("tablet");
+ auto p2 = std::make_unique<Pattern>();
+ REQUIRE(p2->init("iPad"));
+ mp_tablet->add(std::move(p2));
+ c.add(std::move(mp_tablet));
+
+ // Add desktop class third
+ auto mp_desktop = std::make_unique<MultiPattern>("desktop");
+ auto p3 = std::make_unique<Pattern>();
+ REQUIRE(p3->init("Windows|Macintosh"));
+ mp_desktop->add(std::move(p3));
+ c.add(std::move(mp_desktop));
+
+ String name;
+ CHECK(c.classify("Mozilla/5.0 (Android", name) == true);
+ CHECK(name == "mobile");
+
+ CHECK(c.classify("Mozilla/5.0 (iPad", name) == true);
+ CHECK(name == "tablet");
+
+ CHECK(c.classify("Mozilla/5.0 (Windows NT", name) == true);
+ CHECK(name == "desktop");
+
+ CHECK(c.classify("Unknown/1.0", name) == false);
+ }
+
+ SECTION("Classifier with empty multipatterns")
+ {
+ Classifier c;
+
+ // Add an empty multipattern
+ auto mp = std::make_unique<MultiPattern>("empty");
+ c.add(std::move(mp));
+
+ String name;
+ // Should skip empty patterns
+ CHECK(c.classify("test", name) == false);
+ }
+
+ SECTION("Complex real-world classification")
+ {
+ Classifier c;
+
+ // Mobile phones
+ auto mp_phone = std::make_unique<MultiPattern>("phone");
+ auto p1 = std::make_unique<Pattern>();
+ REQUIRE(p1->init("iPhone"));
+ mp_phone->add(std::move(p1));
+ auto p2 = std::make_unique<Pattern>();
+ REQUIRE(p2->init("Android.*Mobile"));
+ mp_phone->add(std::move(p2));
+ c.add(std::move(mp_phone));
+
+ // Tablets
+ auto mp_tablet = std::make_unique<MultiPattern>("tablet");
+ auto p3 = std::make_unique<Pattern>();
+ REQUIRE(p3->init("iPad"));
+ mp_tablet->add(std::move(p3));
+ auto p4 = std::make_unique<Pattern>();
+ REQUIRE(p4->init("Android(?!.*Mobile)"));
+ mp_tablet->add(std::move(p4));
+ c.add(std::move(mp_tablet));
+
+ String name;
+ CHECK(c.classify("Mozilla/5.0 (iPhone; CPU iPhone OS 14_0", name) == true);
+ CHECK(name == "phone");
+
+ CHECK(c.classify("Mozilla/5.0 (Linux; Android 10; SM-G960U) Mobile", name)
== true);
+ CHECK(name == "phone");
+
+ CHECK(c.classify("Mozilla/5.0 (iPad; CPU OS 14_0", name) == true);
+ CHECK(name == "tablet");
+
+ // Android tablet (no "Mobile" in UA)
+ CHECK(c.classify("Mozilla/5.0 (Linux; Android 10; SM-T510)", name) ==
true);
+ CHECK(name == "tablet");
+ }
}
diff --git a/src/tsutil/unit_tests/test_Regex.cc
b/src/tsutil/unit_tests/test_Regex.cc
index b1b2c1609d..26143208d1 100644
--- a/src/tsutil/unit_tests/test_Regex.cc
+++ b/src/tsutil/unit_tests/test_Regex.cc
@@ -258,6 +258,338 @@ TEST_CASE("Regex error codes", "[libts][Regex][errors]")
CHECK(r.exec("foo", matches) != RE_ERROR_NOMATCH);
CHECK(r.exec("foo", matches) == 1);
}
+
+ SECTION("Compile error returns detailed error message")
+ {
+ Regex r;
+ std::string error;
+ int erroffset;
+
+ // Unclosed parenthesis should fail with error message
+ CHECK(r.compile(R"((unclosed)", error, erroffset) == false);
+ CHECK(!error.empty());
+ CHECK(erroffset > 0);
+
+ // Invalid escape sequence
+ error.clear();
+ erroffset = 0;
+ CHECK(r.compile(R"(\k)", error, erroffset) == false);
+ CHECK(!error.empty());
+
+ // Invalid character class
+ error.clear();
+ erroffset = 0;
+ CHECK(r.compile(R"([z-a])", error, erroffset) == false);
+ CHECK(!error.empty());
+ }
+}
+
+TEST_CASE("Regex::empty()", "[libts][Regex][empty]")
+{
+ SECTION("newly constructed Regex is empty")
+ {
+ Regex r;
+ CHECK(r.empty() == true);
+ }
+
+ SECTION("compiled Regex is not empty")
+ {
+ Regex r;
+ REQUIRE(r.compile("test") == true);
+ CHECK(r.empty() == false);
+ }
+
+ SECTION("failed compilation leaves Regex empty")
+ {
+ Regex r;
+ REQUIRE(r.compile("(invalid") == false);
+ CHECK(r.empty() == true);
+ }
+
+ SECTION("recompiling non-empty Regex")
+ {
+ Regex r;
+ REQUIRE(r.compile("foo") == true);
+ CHECK(r.empty() == false);
+
+ REQUIRE(r.compile("bar") == true);
+ CHECK(r.empty() == false);
+ }
+}
+
+TEST_CASE("Regex move semantics", "[libts][Regex][move]")
+{
+ SECTION("move constructor")
+ {
+ Regex r1;
+ REQUIRE(r1.compile("^test$") == true);
+ CHECK(r1.exec("test") == true);
+ CHECK(r1.empty() == false);
+
+ // Move construct r2 from r1
+ Regex r2(std::move(r1));
+ CHECK(r2.empty() == false);
+ CHECK(r2.exec("test") == true);
+ CHECK(r2.exec("foo") == false);
+ }
+
+ SECTION("move assignment operator")
+ {
+ Regex r1;
+ REQUIRE(r1.compile("^test$") == true);
+
+ Regex r2;
+ REQUIRE(r2.compile("^foo$") == true);
+
+ // Move assign r1 to r2
+ r2 = std::move(r1);
+ CHECK(r2.empty() == false);
+ CHECK(r2.exec("test") == true);
+ CHECK(r2.exec("foo") == false);
+ }
+
+ SECTION("move empty Regex")
+ {
+ Regex r1; // empty
+ Regex r2(std::move(r1));
+ CHECK(r2.empty() == true);
+ }
+}
+
+TEST_CASE("Regex RE_UNANCHORED flag", "[libts][Regex][flags][RE_UNANCHORED]")
+{
+ SECTION("RE_UNANCHORED allows matching anywhere in multiline text")
+ {
+ Regex r;
+ // Pattern that should match "test" at start of any line
+ REQUIRE(r.compile("^test", RE_UNANCHORED) == true);
+
+ // Should match at start of string
+ CHECK(r.exec("test\nfoo") == true);
+
+ // Should match after newline (multiline mode)
+ CHECK(r.exec("foo\ntest") == true);
+
+ // Should not match in middle of line
+ CHECK(r.exec("foo test") == false);
+ }
+
+ SECTION("default (without RE_UNANCHORED) only matches at string start")
+ {
+ Regex r;
+ REQUIRE(r.compile("^test") == true);
+
+ // Should match at start
+ CHECK(r.exec("test\nfoo") == true);
+
+ // Should NOT match after newline without RE_UNANCHORED
+ CHECK(r.exec("foo\ntest") == false);
+ }
+}
+
+TEST_CASE("RegexMatches edge cases", "[libts][Regex][RegexMatches]")
+{
+ SECTION("RegexMatches size after no match")
+ {
+ Regex r;
+ RegexMatches matches;
+ REQUIRE(r.compile("test") == true);
+
+ int count = r.exec("nomatch", matches);
+ CHECK(count == RE_ERROR_NOMATCH);
+ CHECK(matches.size() == RE_ERROR_NOMATCH);
+ }
+
+ SECTION("RegexMatches operator[] with various capture counts")
+ {
+ Regex r;
+ REQUIRE(r.compile("(\\w+)-(\\d+)-(\\w+)") == true);
+
+ RegexMatches matches;
+ int count = r.exec("foo-123-bar", matches);
+ CHECK(count == 4); // whole match + 3 groups
+
+ CHECK(matches[0] == "foo-123-bar");
+ CHECK(matches[1] == "foo");
+ CHECK(matches[2] == "123");
+ CHECK(matches[3] == "bar");
+ }
+
+ SECTION("RegexMatches with zero-length captures")
+ {
+ Regex r;
+ REQUIRE(r.compile("(\\w*)-(\\w*)") == true);
+
+ RegexMatches matches;
+
+ // First group empty, second has content
+ int count = r.exec("-foo", matches);
+ CHECK(count == 3);
+ CHECK(matches[0] == "-foo");
+ CHECK(matches[1] == "");
+ CHECK(matches[2] == "foo");
+ }
+
+ SECTION("RegexMatches with optional groups")
+ {
+ Regex r;
+ REQUIRE(r.compile("(\\w+)-(\\d+)?") == true);
+
+ RegexMatches matches;
+
+ // With optional group present
+ int count = r.exec("foo-123", matches);
+ CHECK(count == 3);
+ CHECK(matches[1] == "foo");
+ CHECK(matches[2] == "123");
+
+ // With optional group absent - note: PCRE2 may still count it
+ count = r.exec("foo-", matches);
+ CHECK(count >= 2); // At least whole match + first group
+ CHECK(matches[1] == "foo");
+ }
+}
+
+TEST_CASE("Regex with special characters", "[libts][Regex][special]")
+{
+ SECTION("escaped special characters")
+ {
+ Regex r;
+ REQUIRE(r.compile(R"(\$\d+\.\d+)") == true);
+
+ CHECK(r.exec("$123.45") == true);
+ CHECK(r.exec("123.45") == false);
+ CHECK(r.exec("$12.3") == true);
+ }
+
+ SECTION("character classes")
+ {
+ Regex r;
+ REQUIRE(r.compile(R"([A-Z][a-z]+)") == true);
+
+ CHECK(r.exec("Hello") == true);
+ CHECK(r.exec("hello") == false);
+ CHECK(r.exec("HELLO") == false);
+ }
+
+ SECTION("quantifiers")
+ {
+ Regex r;
+ REQUIRE(r.compile(R"(\d{3}-\d{4})") == true);
+
+ CHECK(r.exec("123-4567") == true);
+ CHECK(r.exec("12-4567") == false);
+ CHECK(r.exec("123-456") == false);
+ }
+
+ SECTION("alternation")
+ {
+ Regex r;
+ REQUIRE(r.compile(R"(foo|bar|baz)") == true);
+
+ CHECK(r.exec("foo") == true);
+ CHECK(r.exec("bar") == true);
+ CHECK(r.exec("baz") == true);
+ CHECK(r.exec("qux") == false);
+ }
+}
+
+TEST_CASE("Regex with complex patterns", "[libts][Regex][complex]")
+{
+ SECTION("greedy vs non-greedy quantifiers")
+ {
+ Regex greedy, non_greedy;
+ REQUIRE(greedy.compile(R"(<.*>)") == true);
+ REQUIRE(non_greedy.compile(R"(<.*?>)") == true);
+
+ RegexMatches matches;
+
+ // Greedy matches everything
+ int count = greedy.exec("<div>content</div>", matches);
+ CHECK(count == 1);
+ CHECK(matches[0] == "<div>content</div>");
+
+ // Non-greedy matches just first tag
+ count = non_greedy.exec("<div>content</div>", matches);
+ CHECK(count == 1);
+ CHECK(matches[0] == "<div>");
+ }
+
+ SECTION("lookahead assertions")
+ {
+ Regex r;
+ // Match "foo" only if followed by "bar"
+ REQUIRE(r.compile(R"(foo(?=bar))") == true);
+
+ CHECK(r.exec("foobar") == true);
+ CHECK(r.exec("foobaz") == false);
+ CHECK(r.exec("foo") == false);
+ }
+
+ SECTION("negative lookahead")
+ {
+ Regex r;
+ // Match "foo" only if NOT followed by "bar"
+ REQUIRE(r.compile(R"(foo(?!bar))") == true);
+
+ CHECK(r.exec("foobar") == false);
+ CHECK(r.exec("foobaz") == true);
+ CHECK(r.exec("foo") == true);
+ }
+
+ SECTION("word boundaries")
+ {
+ Regex r;
+ REQUIRE(r.compile(R"(\btest\b)") == true);
+
+ CHECK(r.exec("test") == true);
+ CHECK(r.exec("a test here") == true);
+ CHECK(r.exec("testing") == false);
+ CHECK(r.exec("attest") == false);
+ }
+}
+
+TEST_CASE("Regex recompilation behavior", "[libts][Regex][recompile]")
+{
+ SECTION("recompile frees previous pattern")
+ {
+ Regex r;
+
+ REQUIRE(r.compile("foo") == true);
+ CHECK(r.exec("foo") == true);
+ CHECK(r.exec("bar") == false);
+
+ // Recompile with different pattern
+ REQUIRE(r.compile("bar") == true);
+ CHECK(r.exec("bar") == true);
+ CHECK(r.exec("foo") == false);
+ }
+
+ SECTION("recompile after failed compilation")
+ {
+ Regex r;
+
+ // First compilation fails
+ REQUIRE(r.compile("(invalid") == false);
+ CHECK(r.empty() == true);
+
+ // Should still be able to compile successfully
+ REQUIRE(r.compile("valid") == true);
+ CHECK(r.empty() == false);
+ CHECK(r.exec("valid") == true);
+ }
+
+ SECTION("recompile with different flags")
+ {
+ Regex r;
+
+ REQUIRE(r.compile("test") == true);
+ CHECK(r.exec("TEST") == false);
+
+ // Recompile with case insensitive flag
+ REQUIRE(r.compile("test", RE_CASE_INSENSITIVE) == true);
+ CHECK(r.exec("TEST") == true);
+ }
}
TEST_CASE("Regex copy constructor", "[libts][Regex][copy]")