Patch attached.
-- Regards, Tim Shen
commit cd0683e3a3eae0c62d2867fb5456edb3735b38ae Author: tim <timshe...@gmail.com> Date: Fri Apr 25 10:45:26 2014 -0400 2014-04-25 Tim Shen <timshe...@gmail.com> * include/bits/regex_automaton.h (_NFA<>::_M_insert_repeat): Add _S_opcode_repeat support to distingush a loop from _S_opcode_alternative. * include/bits/regex_automaton.tcc (_State_base::_M_print, _State_base::_M_dot, _NFA<>::_M_eliminate_dummy, _StateSeq<>::_M_clone): Likewise. * include/bits/regex_compiler.tcc (_Compiler<>::_M_quantifier): Likewise. * include/bits/regex_executor.tcc (_Executor<>::_M_dfs): Likewise. * include/bits/regex_scanner.tcc (_Scanner<>::_M_eat_escape_ecma): Uglify local variable __i. * include/bits/regex_compiler.h (_BracketMatcher<>::_M_make_cache): Use size_t instead of int to compare with vector::size(). diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h index 64ecd6d..1b0da14 100644 --- a/libstdc++-v3/include/bits/regex_automaton.h +++ b/libstdc++-v3/include/bits/regex_automaton.h @@ -52,6 +52,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { _S_opcode_unknown, _S_opcode_alternative, + _S_opcode_repeat, _S_opcode_backref, _S_opcode_line_begin_assertion, _S_opcode_line_end_assertion, @@ -74,7 +75,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION size_t _M_backref_index; // for _S_opcode_backref struct { - // for _S_opcode_alternative or _S_opcode_subexpr_lookahead + // for _S_opcode_alternative, _S_opcode_repeat and + // _S_opcode_subexpr_lookahead _StateIdT _M_alt; // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or // quantifiers (ungreedy if set true) @@ -174,6 +176,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // approach. __tmp._M_next = __next; __tmp._M_alt = __alt; + return _M_insert_state(std::move(__tmp)); + } + + _StateIdT + _M_insert_repeat(_StateIdT __next, _StateIdT __alt, bool __neg) + { + _StateT __tmp(_S_opcode_repeat); + // It labels every quantifier to make greedy comparison easier in BFS + // approach. + __tmp._M_next = __next; + __tmp._M_alt = __alt; __tmp._M_neg = __neg; return _M_insert_state(std::move(__tmp)); } diff --git a/libstdc++-v3/include/bits/regex_automaton.tcc b/libstdc++-v3/include/bits/regex_automaton.tcc index 38787fa..e0ac3f9 100644 --- a/libstdc++-v3/include/bits/regex_automaton.tcc +++ b/libstdc++-v3/include/bits/regex_automaton.tcc @@ -41,6 +41,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION switch (_M_opcode) { case _S_opcode_alternative: + case _S_opcode_repeat: ostr << "alt next=" << _M_next << " alt=" << _M_alt; break; case _S_opcode_subexpr_begin: @@ -72,6 +73,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION switch (_M_opcode) { case _S_opcode_alternative: + case _S_opcode_repeat: __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n" << __id << " -> " << _M_next << " [label=\"next\", tailport=\"s\"];\n" @@ -174,6 +176,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION == _S_opcode_dummy) __it._M_next = (*this)[__it._M_next]._M_next; if (__it._M_opcode == _S_opcode_alternative + || __it._M_opcode == _S_opcode_repeat || __it._M_opcode == _S_opcode_subexpr_lookahead) while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode == _S_opcode_dummy) @@ -198,6 +201,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION auto __id = _M_nfa._M_insert_state(__dup); __m[__u] = __id; if (__dup._M_opcode == _S_opcode_alternative + || __dup._M_opcode == _S_opcode_repeat || __dup._M_opcode == _S_opcode_subexpr_lookahead) if (__dup._M_alt != _S_invalid_state_id && __m[__dup._M_alt] == -1) __stack.push(__dup._M_alt); @@ -217,6 +221,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __ref._M_next = __m[__ref._M_next]; } if (__ref._M_opcode == _S_opcode_alternative + || __ref._M_opcode == _S_opcode_repeat || __ref._M_opcode == _S_opcode_subexpr_lookahead) if (__ref._M_alt != _S_invalid_state_id) { diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index f5a198f..d7e2162 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -421,7 +421,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_make_cache(true_type) { - for (int __i = 0; __i < _M_cache.size(); __i++) + for (size_t __i = 0; __i < _M_cache.size(); __i++) _M_cache[static_cast<_UnsignedCharT>(__i)] = _M_apply(__i, false_type()); } diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index 128dac1..3cf9e457 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -188,8 +188,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { __init(); auto __e = _M_pop(); - _StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id, - __e._M_start, __neg)); + _StateSeqT __r(_M_nfa, _M_nfa._M_insert_repeat(_S_invalid_state_id, + __e._M_start, __neg)); __e._M_append(__r); _M_stack.push(__r); } @@ -197,8 +197,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { __init(); auto __e = _M_pop(); - __e._M_append(_M_nfa._M_insert_alt(_S_invalid_state_id, __e._M_start, - __neg)); + __e._M_append(_M_nfa._M_insert_repeat(_S_invalid_state_id, + __e._M_start, __neg)); _M_stack.push(__e); } else if (_M_match_token(_ScannerT::_S_token_opt)) @@ -206,8 +206,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __init(); auto __e = _M_pop(); auto __end = _M_nfa._M_insert_dummy(); - _StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id, - __e._M_start, __neg)); + _StateSeqT __r(_M_nfa, _M_nfa._M_insert_repeat(_S_invalid_state_id, + __e._M_start, __neg)); __e._M_append(__end); __r._M_append(__end); _M_stack.push(__r); @@ -244,8 +244,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { auto __tmp = __r._M_clone(); _StateSeqT __s(_M_nfa, - _M_nfa._M_insert_alt(_S_invalid_state_id, - __tmp._M_start, __neg)); + _M_nfa._M_insert_repeat(_S_invalid_state_id, + __tmp._M_start, __neg)); __tmp._M_append(__s); __e._M_append(__s); } @@ -261,8 +261,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION for (long __i = 0; __i < __n; ++__i) { auto __tmp = __r._M_clone(); - auto __alt = _M_nfa._M_insert_alt(__tmp._M_start, - __end, __neg); + auto __alt = _M_nfa._M_insert_repeat(__tmp._M_start, + __end, __neg); __stack.push(__alt); __e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end)); } diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc index bb10a72..6dd4d25 100644 --- a/libstdc++-v3/include/bits/regex_executor.tcc +++ b/libstdc++-v3/include/bits/regex_executor.tcc @@ -194,7 +194,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } }; - // TODO: Use a function vector to dispatch, instead of using switch-case. template<typename _BiIter, typename _Alloc, typename _TraitsT, bool __dfs_mode> template<bool __match_mode> @@ -217,7 +216,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // of this quantifier". Executing _M_next first or _M_alt first don't // mean the same thing, and we need to choose the correct order under // given greedy mode. - case _S_opcode_alternative: + case _S_opcode_repeat: { // Greedy. if (!__state._M_neg) @@ -364,6 +363,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } } break; + case _S_opcode_alternative: + _M_dfs<__match_mode>(__state._M_alt); + if (!__dfs_mode || !_M_has_sol) + _M_dfs<__match_mode>(__state._M_next); + break; default: _GLIBCXX_DEBUG_ASSERT(false); } diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc index 5332d2e..f501ff6 100644 --- a/libstdc++-v3/include/bits/regex_scanner.tcc +++ b/libstdc++-v3/include/bits/regex_scanner.tcc @@ -335,7 +335,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION else if (__c == 'x' || __c == 'u') { _M_value.erase(); - for (int i = 0; i < (__c == 'x' ? 2 : 4); i++) + for (int __i = 0; __i < (__c == 'x' ? 2 : 4); __i++) { if (_M_current == _M_end || !_M_ctype.is(_CtypeT::xdigit, *_M_current))