pammon created this revision.
Herald added a reviewer: EricWF.
In regex, forward assertions like '(?=stuff)' are implemented by
constructing a child regular expression 'stuff' and matching that.
If the child regular expression contains a backreference, this would
trip an assertion or reference the wrong capture group, because the
child was ignorant of the capture groups of its parent. For example,
/(x)(?=\1)/ would trip an assertion.
Address this by propagating submatches into the child, so that
backreferences reference the correct capture groups. This also allows us
to eliminate the mexp_ field, because the child expression shares the
entire submatch array with the parent.
https://reviews.llvm.org/D32635
Files:
include/regex
test/std/re/re.alg/re.alg.match/ecma.pass.cpp
Index: test/std/re/re.alg/re.alg.match/ecma.pass.cpp
===
--- test/std/re/re.alg/re.alg.match/ecma.pass.cpp
+++ test/std/re/re.alg/re.alg.match/ecma.pass.cpp
@@ -637,6 +637,22 @@
assert(m.str(0) == s);
}
{
+ std::cmatch m;
+ const char s[] = "abcabc";
+ assert(std::regex_match(s, m, std::regex("(.+)(?=\\1)(\\1)")));
+ assert(m.size() == 3);
+ assert(m.str(1) == "abc");
+ assert(m.str(2) == "abc");
+}
+{
+ std::cmatch m;
+ const char s[] = "aa";
+ assert(std::regex_match(s, m, std::regex("(a+)(?!\\1)(a*)")));
+ assert(m.size() == 3);
+ assert(m.str(1) == "aa");
+ assert(m.str(2) == "");
+}
+{
std::cmatch m;
const char s[] = "foobar";
assert(std::regex_match(s, m, std::regex("[^\\0]*")));
Index: include/regex
===
--- include/regex
+++ include/regex
@@ -2826,7 +2826,7 @@
void __push_end_marked_subexpression(unsigned);
void __push_empty();
void __push_word_boundary(bool);
-void __push_lookahead(const basic_regex&, bool, unsigned);
+void __push_lookahead(basic_regex, bool);
template
bool
@@ -2843,6 +2843,7 @@
bool
__match_at_start_ecma(const _CharT* __first, const _CharT* __last,
match_results& __m,
+ const vector> &incoming_sub_matches,
regex_constants::match_flag_type __flags, bool) const;
template
bool
@@ -2964,17 +2965,16 @@
typedef __owns_one_state<_CharT> base;
basic_regex<_CharT, _Traits> __exp_;
-unsigned __mexp_;
bool __invert_;
__lookahead(const __lookahead&);
__lookahead& operator=(const __lookahead&);
public:
typedef _VSTD::__state<_CharT> __state;
_LIBCPP_INLINE_VISIBILITY
-__lookahead(const basic_regex<_CharT, _Traits>& __exp, bool __invert, __node<_CharT>* __s, unsigned __mexp)
-: base(__s), __exp_(__exp), __mexp_(__mexp), __invert_(__invert) {}
+__lookahead(basic_regex<_CharT, _Traits> __exp, bool __invert, __node<_CharT>* __s)
+: base(__s), __exp_(move(__exp)), __invert_(__invert) {}
virtual void __exec(__state&) const;
};
@@ -2987,16 +2987,18 @@
__m.__init(1 + __exp_.mark_count(), __s.__current_, __s.__last_);
bool __matched = __exp_.__match_at_start_ecma(
__s.__current_, __s.__last_,
-__m,
+__m, __s.__sub_matches_,
(__s.__flags_ | regex_constants::match_continuous) &
~regex_constants::__full_match,
__s.__at_first_ && __s.__current_ == __s.__first_);
if (__matched != __invert_)
{
__s.__do_ = __state::__accept_but_not_consume;
__s.__node_ = this->first();
-for (unsigned __i = 1; __i < __m.size(); ++__i) {
-__s.__sub_matches_[__mexp_ + __i - 1] = __m.__matches_[__i];
+if (__matched) {
+for (unsigned __i = 1; __i < __m.size(); ++__i) {
+__s.__sub_matches_[__i - 1] = __m.__matches_[__i];
+}
}
}
else
@@ -4168,26 +4170,16 @@
switch (*__temp)
{
case '=':
-{
-basic_regex __exp;
-__exp.__flags_ = __flags_;
-__temp = __exp.__parse(++__temp, __last);
-unsigned __mexp = __exp.__marked_count_;
-__push_lookahead(_VSTD::move(__exp), false, __marked_count_);
-__marked_count_ += __mexp;
-if (__temp == __last || *__temp != ')')
-__throw_regex_error();
-__first = ++__temp;
-}
-break;
case '!':
{
+bool __invert = (*__temp == '!');
basic_regex __exp;