[PATCH] D32635: [libcxx] regex: fix backreferences in forward assertions

2017-04-28 Thread Peter Ammon via Phabricator via cfe-commits
pammon created this revision.
Herald added a reviewer: EricWF.

In regex, forward assertions like '(?=stuff)' are implemented by
constructing a child regular expression 'stuff' and matching that.
If the child regular expression contains a backreference, this would
trip an assertion or reference the wrong capture group, because the
child was ignorant of the capture groups of its parent. For example,
/(x)(?=\1)/ would trip an assertion.

Address this by propagating submatches into the child, so that
backreferences reference the correct capture groups. This also allows us
to eliminate the mexp_ field, because the child expression shares the
entire submatch array with the parent.


https://reviews.llvm.org/D32635

Files:
  include/regex
  test/std/re/re.alg/re.alg.match/ecma.pass.cpp

Index: test/std/re/re.alg/re.alg.match/ecma.pass.cpp
===
--- test/std/re/re.alg/re.alg.match/ecma.pass.cpp
+++ test/std/re/re.alg/re.alg.match/ecma.pass.cpp
@@ -637,6 +637,22 @@
 assert(m.str(0) == s);
 }
 {
+  std::cmatch m;
+  const char s[] = "abcabc";
+  assert(std::regex_match(s, m, std::regex("(.+)(?=\\1)(\\1)")));
+  assert(m.size() == 3);
+  assert(m.str(1) == "abc");
+  assert(m.str(2) == "abc");
+}
+{
+  std::cmatch m;
+  const char s[] = "aa";
+  assert(std::regex_match(s, m, std::regex("(a+)(?!\\1)(a*)")));
+  assert(m.size() == 3);
+  assert(m.str(1) == "aa");
+  assert(m.str(2) == "");
+}
+{
 std::cmatch m;
 const char s[] = "foobar";
 assert(std::regex_match(s, m, std::regex("[^\\0]*")));
Index: include/regex
===
--- include/regex
+++ include/regex
@@ -2826,7 +2826,7 @@
 void __push_end_marked_subexpression(unsigned);
 void __push_empty();
 void __push_word_boundary(bool);
-void __push_lookahead(const basic_regex&, bool, unsigned);
+void __push_lookahead(basic_regex, bool);
 
 template 
 bool
@@ -2843,6 +2843,7 @@
 bool
 __match_at_start_ecma(const _CharT* __first, const _CharT* __last,
  match_results& __m,
+ const vector> &incoming_sub_matches,
  regex_constants::match_flag_type __flags, bool) const;
 template 
 bool
@@ -2964,17 +2965,16 @@
 typedef __owns_one_state<_CharT> base;
 
 basic_regex<_CharT, _Traits> __exp_;
-unsigned __mexp_;
 bool __invert_;
 
 __lookahead(const __lookahead&);
 __lookahead& operator=(const __lookahead&);
 public:
 typedef _VSTD::__state<_CharT> __state;
 
 _LIBCPP_INLINE_VISIBILITY
-__lookahead(const basic_regex<_CharT, _Traits>& __exp, bool __invert, __node<_CharT>* __s, unsigned __mexp)
-: base(__s), __exp_(__exp), __mexp_(__mexp), __invert_(__invert) {}
+__lookahead(basic_regex<_CharT, _Traits> __exp, bool __invert, __node<_CharT>* __s)
+: base(__s), __exp_(move(__exp)), __invert_(__invert) {}
 
 virtual void __exec(__state&) const;
 };
@@ -2987,16 +2987,18 @@
 __m.__init(1 + __exp_.mark_count(), __s.__current_, __s.__last_);
 bool __matched = __exp_.__match_at_start_ecma(
 __s.__current_, __s.__last_,
-__m,
+__m, __s.__sub_matches_,
 (__s.__flags_ | regex_constants::match_continuous) &
 ~regex_constants::__full_match,
 __s.__at_first_ && __s.__current_ == __s.__first_);
 if (__matched != __invert_)
 {
 __s.__do_ = __state::__accept_but_not_consume;
 __s.__node_ = this->first();
-for (unsigned __i = 1; __i < __m.size(); ++__i) {
-__s.__sub_matches_[__mexp_ + __i - 1] = __m.__matches_[__i];
+if (__matched) {
+for (unsigned __i = 1; __i < __m.size(); ++__i) {
+__s.__sub_matches_[__i - 1] = __m.__matches_[__i];
+}
 }
 }
 else
@@ -4168,26 +4170,16 @@
 switch (*__temp)
 {
 case '=':
-{
-basic_regex __exp;
-__exp.__flags_ = __flags_;
-__temp = __exp.__parse(++__temp, __last);
-unsigned __mexp = __exp.__marked_count_;
-__push_lookahead(_VSTD::move(__exp), false, __marked_count_);
-__marked_count_ += __mexp;
-if (__temp == __last || *__temp != ')')
-__throw_regex_error();
-__first = ++__temp;
-}
-break;
 case '!':
 {
+bool __invert = (*__temp == '!');
 basic_regex __exp;
   

[PATCH] D32635: [libcxx] regex: fix backreferences in forward assertions

2017-05-05 Thread Peter Ammon via Phabricator via cfe-commits
pammon added a comment.

ping?


https://reviews.llvm.org/D32635



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits