On 9 March 2015 at 10:56, Tim Shen wrote: > I guess this patch doesn't break abi compatibility, so if everything > is Ok, I'm gonna patch it to 4.9 too.
It doesn't change the ABI directly, but it does change the layout of the match_results' data on the heap. It mean that instantiations of e.g. __regex_algo_impl compiled with GCC 4.9.2 will still call __res.resize(n+2) when poopulating a match_results whereas code compiled with 4.9.3 would expect to be able to find n+3 sub_match objects in the match_results object. I think it's safer to not change the branch. > I'm not sure if this is a "regression fix" though; if it's > inappropriate for trunk, then I can simply wait. For the reasons given above, I don't think we want to change this between 5.1 and 5.2, but also don't want to wait for 6.0 to fix these bugs, so I've CC'd the release managers for their OK. RMs: this fixes two bugs in std::regex, which was new in 4.9.0 and is still stabilising, as shown by this fix. For the reasons given above I think we should fix it on trunk now. The patch has my approval, OK to commit? > Bootstrapped in trunk and tested.
commit cf3977f3077a10de8ccf6dae188f945e1008b2c6 Author: timshen <tims...@google.com> Date: Mon Mar 9 03:41:31 2015 -0700 PR libstdc++/64441 * include/bits/regex.h (match_results<>::size, match_results<>::position, match_results<>::str, match_results<>::operator[], match_results<>::prefix, match_results<>::suffix, match_results<>::end, match_results<>::_M_resize, match_results<>::_M_unmatched_sub, match_results<>::_M_prefix, match_results<>::_M_suffix): Remove global __unmatched_sub. Add unmatched submatch as part of match_results. * include/bits/regex.tcc (__regex_algo_impl<>, regex_replace<>, regex_iterator<>::operator++): Adjust to use match_results::_M_prefix. * testsuite/28_regex/match_results/out_of_range_submatches.cc: New testcases. diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h index 2b09da6..a23c2c9 100644 --- a/libstdc++-v3/include/bits/regex.h +++ b/libstdc++-v3/include/bits/regex.h @@ -1483,17 +1483,6 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 // [7.10] Class template match_results - /* - * Special sub_match object representing an unmatched sub-expression. - */ - template<typename _Bi_iter> - inline const sub_match<_Bi_iter>& - __unmatched_sub() - { - static const sub_match<_Bi_iter> __unmatched = sub_match<_Bi_iter>(); - return __unmatched; - } - /** * @brief The results of a match or search operation. * @@ -1523,15 +1512,20 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 { private: /* - * The vector base is empty if this does not represent a successful match. - * Otherwise it contains n+3 elements where n is the number of marked + * The vector base is empty if this does not represent a match (!ready()); + * Otherwise if it's a match failure, it contains 3 elements: + * [0] unmatched + * [1] prefix + * [2] suffix + * Otherwise it contains n+4 elements where n is the number of marked * sub-expressions: * [0] entire match * [1] 1st marked subexpression * ... * [n] nth marked subexpression - * [n+1] prefix - * [n+2] suffix + * [n+1] unmatched + * [n+2] prefix + * [n+3] suffix */ typedef std::vector<sub_match<_Bi_iter>, _Alloc> _Base_type; typedef std::iterator_traits<_Bi_iter> __iter_traits; @@ -1623,10 +1617,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 */ size_type size() const - { - size_type __size = _Base_type::size(); - return (__size && _Base_type::operator[](0).matched) ? __size - 2 : 0; - } + { return _Base_type::empty() ? 0 : _Base_type::size() - 3; } size_type max_size() const @@ -1670,15 +1661,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 * is zero (the default), in which case this function returns the offset * from the beginning of the target sequence to the beginning of the * match. - * - * Returns -1 if @p __sub is out of range. */ difference_type position(size_type __sub = 0) const - { - return __sub < size() ? std::distance(_M_begin, - (*this)[__sub].first) : -1; - } + { return std::distance(_M_begin, (*this)[__sub].first); } /** * @brief Gets the match or submatch converted to a string type. @@ -1691,7 +1677,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 */ string_type str(size_type __sub = 0) const - { return (*this)[__sub].str(); } + { return string_type((*this)[__sub]); } /** * @brief Gets a %sub_match reference for the match or submatch. @@ -1707,10 +1693,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 const_reference operator[](size_type __sub) const { - _GLIBCXX_DEBUG_ASSERT( ready() ); - return __sub < size() - ? _Base_type::operator[](__sub) - : __unmatched_sub<_Bi_iter>(); + _GLIBCXX_DEBUG_ASSERT( ready() ); + return __sub < size() + ? _Base_type::operator[](__sub) + : _M_unmatched_sub(); } /** @@ -1724,10 +1710,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 const_reference prefix() const { - _GLIBCXX_DEBUG_ASSERT( ready() ); - return !empty() - ? _Base_type::operator[](_Base_type::size() - 2) - : __unmatched_sub<_Bi_iter>(); + _GLIBCXX_DEBUG_ASSERT( ready() ); + return !empty() ? _M_prefix() : _M_unmatched_sub(); } /** @@ -1742,9 +1726,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 suffix() const { _GLIBCXX_DEBUG_ASSERT( ready() ); - return !empty() - ? _Base_type::operator[](_Base_type::size() - 1) - : __unmatched_sub<_Bi_iter>(); + return !empty() ? _M_suffix() : _M_unmatched_sub(); } /** @@ -1766,7 +1748,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 */ const_iterator end() const - { return _Base_type::end() - 2; } + { return _Base_type::end() - 3; } /** * @brief Gets an iterator to one-past-the-end of the collection. @@ -1883,6 +1865,34 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 const basic_regex<_Cp, _Rp>&, regex_constants::match_flag_type); + void + _M_resize(unsigned int __size) + { _Base_type::resize(__size + 3); } + + const_reference + _M_unmatched_sub() const + { return _Base_type::operator[](_Base_type::size() - 3); } + + sub_match<_Bi_iter>& + _M_unmatched_sub() + { return _Base_type::operator[](_Base_type::size() - 3); } + + const_reference + _M_prefix() const + { return _Base_type::operator[](_Base_type::size() - 2); } + + sub_match<_Bi_iter>& + _M_prefix() + { return _Base_type::operator[](_Base_type::size() - 2); } + + const_reference + _M_suffix() const + { return _Base_type::operator[](_Base_type::size() - 1); } + + sub_match<_Bi_iter>& + _M_suffix() + { return _Base_type::operator[](_Base_type::size() - 1); } + _Bi_iter _M_begin; }; diff --git a/libstdc++-v3/include/bits/regex.tcc b/libstdc++-v3/include/bits/regex.tcc index aad56e0..823ad51 100644 --- a/libstdc++-v3/include/bits/regex.tcc +++ b/libstdc++-v3/include/bits/regex.tcc @@ -63,7 +63,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m; __m._M_begin = __s; - __res.resize(__re._M_automaton->_M_sub_count() + 2); + __m._M_resize(__re._M_automaton->_M_sub_count()); for (auto& __it : __res) __it.matched = false; @@ -99,8 +99,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION for (auto& __it : __res) if (!__it.matched) __it.first = __it.second = __e; - auto& __pre = __res[__res.size()-2]; - auto& __suf = __res[__res.size()-1]; + auto& __pre = __m._M_prefix(); + auto& __suf = __m._M_suffix(); if (__match_mode) { __pre.matched = false; @@ -120,6 +120,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __suf.matched = (__suf.first != __suf.second); } } + else + { + __m._M_resize(0); + for (auto& __it : __res) + { + __it.matched = false; + __it.first = __it.second = __e; + } + } return __ret; } @@ -374,7 +383,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION auto __output = [&](size_t __idx) { - auto& __sub = _Base_type::operator[](__idx); + auto& __sub = (*this)[__idx]; if (__sub.matched) __out = std::copy(__sub.first, __sub.second, __out); }; @@ -425,9 +434,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION else if (__eat('&')) __output(0); else if (__eat('`')) - __output(_Base_type::size()-2); + { + auto& __sub = _M_prefix(); + if (__sub.matched) + __out = std::copy(__sub.first, __sub.second, __out); + } else if (__eat('\'')) - __output(_Base_type::size()-1); + { + auto& __sub = _M_suffix(); + if (__sub.matched) + __out = std::copy(__sub.first, __sub.second, __out); + } else if (__fctyp.is(__ctype_type::digit, *__next)) { long __num = __traits.value(*__next, 10); @@ -532,7 +549,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION | regex_constants::match_continuous)) { _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched); - auto& __prefix = _M_match.at(_M_match.size()); + auto& __prefix = _M_match._M_prefix(); __prefix.first = __prefix_first; __prefix.matched = __prefix.first != __prefix.second; // [28.12.1.4.5] @@ -547,7 +564,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) { _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched); - auto& __prefix = _M_match.at(_M_match.size()); + auto& __prefix = _M_match._M_prefix(); __prefix.first = __prefix_first; __prefix.matched = __prefix.first != __prefix.second; // [28.12.1.4.5] diff --git a/libstdc++-v3/testsuite/28_regex/match_results/out_of_range_submatches.cc b/libstdc++-v3/testsuite/28_regex/match_results/out_of_range_submatches.cc new file mode 100644 index 0000000..2309b13 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/match_results/out_of_range_submatches.cc @@ -0,0 +1,81 @@ +// { dg-options "-std=gnu++11" } + +// Copyright (C) 2015 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +#include <regex> +#include <testsuite_hooks.h> +#include <testsuite_regex.h> + +using namespace std; +using namespace __gnu_test; + +// libstdc++/64441 +void +test01() +{ + bool test __attribute__((unused)) = true; + + const char s[] = "abc"; + const std::regex re("(\\d+)|(\\w+)"); + + std::cmatch m; + VERIFY(regex_search_debug(s, m, re)); + + std::tuple<bool, string, int, int> expected[] = { + make_tuple(true, "abc", 0, 3), + make_tuple(false, "", 3, 3), + make_tuple(true, "abc", 0, 3), + make_tuple(false, "", 3, 3), + }; + for (size_t i = 0, n = m.size(); i <= n; ++i) { + auto&& sub = m[i]; + VERIFY(sub.matched == std::get<0>(expected[i])); + VERIFY(sub.str() == std::get<1>(expected[i])); + VERIFY((sub.first - s) == std::get<2>(expected[i])); + VERIFY((sub.second - s) == std::get<3>(expected[i])); + } +} + +// libstdc++/64781 +void +test02() +{ + bool test __attribute__((unused)) = true; + + std::match_results<const char*> m; + const char s[] = "a"; + VERIFY(regex_search_debug(s, m, std::regex("a"))); + + VERIFY(m.size() == 1); + + VERIFY(m[0].first == s+0); + VERIFY(m[0].second == s+1); + VERIFY(m[0].matched == true); + + VERIFY(m[42].first == s+1); + VERIFY(m[42].second == s+1); + VERIFY(m[42].matched == false); +} + +int +main() +{ + test01(); + test02(); + return 0; +}