On Fri, Jun 27, 2014 at 12:37 AM, Paolo Carlini <paolo.carl...@oracle.com> wrote: > The actual patch is missing.. ;)
Sigh...every time. Sorry. > PS: sorry for being distracted by other issues: what happened to the other > regex issue? I think we are simply going to apply, when ready, your more > complete fix, right? The problem given in the other PR (PR 61582) is also solved by this patch (but I forgot to mention that); They are all examples like nested range quantifiers. PR 61424's patch is ready; I was waiting for some "Ok go ahead", but now I realize that I need to send the complete patch first :) I'll send it later. -- Regards, Tim Shen
commit 78a8e40d53f3083f5217b30908fda9442e0a92c5 Author: Tim Shen <tims...@google.com> Date: Thu Jun 26 20:35:33 2014 -0700 PR libstdc++/61061 PR libstdc++/61582 * include/bits/regex_automaton.h (_NFA<>::_M_insert_state): Add a NFA state limit. If it's exceeded, regex_constants::error_space will be throwed. * include/bits/regex_automaton.tcc (_StateSeq<>::_M_clone): Use map (which is sparse) instead of vector. This reduce n times clones' cost from O(n^2) to O(n). * include/std/regex: Add map dependency. * testsuite/28_regex/algorithms/regex_match/ecma/char/61601.cc: New testcase. diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h index 1b0da14..27ec671 100644 --- a/libstdc++-v3/include/bits/regex_automaton.h +++ b/libstdc++-v3/include/bits/regex_automaton.h @@ -28,6 +28,11 @@ * Do not attempt to use it directly. @headername{regex} */ +// This macro defines the maximal state number a NFA can have. +#ifndef _GLIBCXX_REGEX_STATE_LIMIT +#define _GLIBCXX_REGEX_STATE_LIMIT 100000 +#endif + namespace std _GLIBCXX_VISIBILITY(default) { namespace __detail @@ -254,6 +259,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_insert_state(_StateT __s) { this->push_back(std::move(__s)); + if (this->size() > _GLIBCXX_REGEX_STATE_LIMIT) + __throw_regex_error(regex_constants::error_space); return this->size()-1; } diff --git a/libstdc++-v3/include/bits/regex_automaton.tcc b/libstdc++-v3/include/bits/regex_automaton.tcc index 1c6cfdd..08d271e 100644 --- a/libstdc++-v3/include/bits/regex_automaton.tcc +++ b/libstdc++-v3/include/bits/regex_automaton.tcc @@ -189,7 +189,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StateSeq<_TraitsT> _StateSeq<_TraitsT>::_M_clone() { - std::vector<_StateIdT> __m(_M_nfa.size(), -1); + std::map<_StateIdT, _StateIdT> __m; std::stack<_StateIdT> __stack; __stack.push(_M_start); while (!__stack.empty()) @@ -203,21 +203,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (__dup._M_opcode == _S_opcode_alternative || __dup._M_opcode == _S_opcode_repeat || __dup._M_opcode == _S_opcode_subexpr_lookahead) - if (__dup._M_alt != _S_invalid_state_id && __m[__dup._M_alt] == -1) + if (__dup._M_alt != _S_invalid_state_id + && __m.count(__dup._M_alt) == 0) __stack.push(__dup._M_alt); if (__u == _M_end) continue; - if (__dup._M_next != _S_invalid_state_id && __m[__dup._M_next] == -1) + if (__dup._M_next != _S_invalid_state_id + && __m.count(__dup._M_next) == 0) __stack.push(__dup._M_next); } - for (auto __v : __m) + for (auto __it : __m) { - if (__v == -1) - continue; + auto __v = __it.second; auto& __ref = _M_nfa[__v]; if (__ref._M_next != _S_invalid_state_id) { - _GLIBCXX_DEBUG_ASSERT(__m[__ref._M_next] != -1); + _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_next) > 0); __ref._M_next = __m[__ref._M_next]; } if (__ref._M_opcode == _S_opcode_alternative @@ -225,7 +226,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION || __ref._M_opcode == _S_opcode_subexpr_lookahead) if (__ref._M_alt != _S_invalid_state_id) { - _GLIBCXX_DEBUG_ASSERT(__m[__ref._M_alt] != -1); + _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_alt) > 0); __ref._M_alt = __m[__ref._M_alt]; } } diff --git a/libstdc++-v3/include/std/regex b/libstdc++-v3/include/std/regex index 9161f48..470772af 100644 --- a/libstdc++-v3/include/std/regex +++ b/libstdc++-v3/include/std/regex @@ -50,6 +50,7 @@ #include <string> #include <utility> #include <vector> +#include <map> #include <cstring> #include <bits/regex_constants.h> diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/61601.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/61601.cc new file mode 100644 index 0000000..88efab5 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/61601.cc @@ -0,0 +1,48 @@ +// { dg-options "-std=gnu++11" } + +// Copyright (C) 2013-2014 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// 28.11.2 regex_match + +#include <regex> +#include <testsuite_hooks.h> +#include <testsuite_regex.h> + +using namespace __gnu_test; +using namespace std; + +// libstdc++/61601 +void +test01() +{ + try + { + regex r("((.*)$1{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}{100})"); + } + catch (const regex_error& e) + { + VERIFY(e.code() == regex_constants::error_space); + } +} + +int +main() +{ + test01(); + return 0; +}