On Tue, 29 Apr 2025 at 10:37, Tomasz Kaminski <tkami...@redhat.com> wrote: > > > > On Tue, Apr 29, 2025 at 10:58 AM Jonathan Wakely <jwak...@redhat.com> wrote: >> >> This will hardly make a dent in the very slow compile times for <regex> >> but it seems worth doing anyway. >> >> libstdc++-v3/ChangeLog: >> >> * include/bits/regex_compiler.h: Replace _GLIBCXX17_CONSTEXPR >> with constexpr and disable diagnostics with pragmas. >> (_AnyMatcher::operator()): Use constexpr-if instead of tag >> dispatching. Postpone calls to _M_translate until after checking >> result of earlier calls. >> (_AnyMatcher::_M_apply): Remove both overloads. >> (_BracketMatcher::operator(), _BracketMatcher::_M_ready): >> Replace tag dispatching with 'if constexpr'. >> (_BracketMatcher::_M_apply(_CharT, true_type)): Remove. >> (_BracketMatcher::_M_apply(_CharT, false_type)): Remove second >> parameter. >> (_BracketMatcher::_M_make_cache): Remove both overloads. >> * include/bits/regex_compiler.tcc (_BracketMatcher::_M_apply): >> Remove second parameter. >> * include/bits/regex_executor.tcc: Replace _GLIBCXX17_CONSTEXPR >> with constexpr and disable diagnostics with pragmas. >> (_Executor::_M_handle_backref): Replace __glibcxx_assert with >> constexpr-if and use __builtin_unreachable for non-DFS mode >> specialization. >> (_Executor::_M_handle_accept): Mark _S_opcode_backref case as >> unreachable for non-DFS mode. >> --- >> >> Tested x86_64-linux. >> >> The v2 patch uses constexpr-if in <bits/regex_executor.tcc> as well, and >> optimizes _AnyMatcher so it doesn't do all the _M_translate calls up >> front. >> >> >> libstdc++-v3/include/bits/regex_compiler.h | 67 ++++++++------------ >> libstdc++-v3/include/bits/regex_compiler.tcc | 2 +- >> libstdc++-v3/include/bits/regex_executor.tcc | 62 ++++++++++-------- >> 3 files changed, 64 insertions(+), 67 deletions(-) >> >> diff --git a/libstdc++-v3/include/bits/regex_compiler.h >> b/libstdc++-v3/include/bits/regex_compiler.h >> index f24c7e3baa6..21e7065e066 100644 >> --- a/libstdc++-v3/include/bits/regex_compiler.h >> +++ b/libstdc++-v3/include/bits/regex_compiler.h >> @@ -38,6 +38,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 >> >> _GLIBCXX_END_NAMESPACE_CXX11 >> >> +#pragma GCC diagnostic push >> +#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr >> namespace __detail >> { >> /** >> @@ -221,9 +223,9 @@ namespace __detail >> _CharT >> _M_translate(_CharT __ch) const >> { >> - if _GLIBCXX17_CONSTEXPR (__icase) >> + if constexpr (__icase) >> return _M_traits.translate_nocase(__ch); >> - else if _GLIBCXX17_CONSTEXPR (__collate) >> + else if constexpr (__collate) >> return _M_traits.translate(__ch); >> else >> return __ch; >> @@ -285,7 +287,7 @@ namespace __detail >> bool >> _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const >> { >> - if _GLIBCXX17_CONSTEXPR (!__icase) >> + if constexpr (!__icase) >> return __first <= __ch && __ch <= __last; >> else >> return this->_M_in_range_icase(__first, __last, __ch); >> @@ -376,26 +378,20 @@ namespace __detail >> >> bool >> operator()(_CharT __ch) const >> - { return _M_apply(__ch, typename is_same<_CharT, char>::type()); } >> - >> - bool >> - _M_apply(_CharT __ch, true_type) const >> { >> - auto __c = _M_translator._M_translate(__ch); >> - auto __n = _M_translator._M_translate('\n'); >> - auto __r = _M_translator._M_translate('\r'); >> - return __c != __n && __c != __r; >> - } >> - >> - bool >> - _M_apply(_CharT __ch, false_type) const >> - { >> - auto __c = _M_translator._M_translate(__ch); >> - auto __n = _M_translator._M_translate('\n'); >> - auto __r = _M_translator._M_translate('\r'); >> - auto __u2028 = _M_translator._M_translate(u'\u2028'); >> - auto __u2029 = _M_translator._M_translate(u'\u2029'); >> - return __c != __n && __c != __r && __c != __u2028 && __c != __u2029; >> + const auto __c = _M_translator._M_translate(__ch); >> + if (__c == _M_translator._M_translate('\n')) >> + return false; >> + if (__c == _M_translator._M_translate('\r')) >> + return false; >> + if constexpr (!is_same<_CharT, char>::value) >> + { >> + if (__c == _M_translator._M_translate(u'\u2028')) // line sep >> + return false; >> + if (__c == _M_translator._M_translate(u'\u2029')) // para sep >> + return false; >> + } >> + return true; >> } >> >> _TransT _M_translator; >> @@ -441,7 +437,10 @@ namespace __detail >> operator()(_CharT __ch) const >> { >> _GLIBCXX_DEBUG_ASSERT(_M_is_ready); >> - return _M_apply(__ch, _UseCache()); >> + if constexpr (_UseCache::value) >> + if (!(__ch & 0x80)) [[__likely__]] >> + return _M_cache[static_cast<_UnsignedCharT>(__ch)]; >> + return _M_apply(__ch); >> } >> >> void >> @@ -512,7 +511,9 @@ namespace __detail >> std::sort(_M_char_set.begin(), _M_char_set.end()); >> auto __end = std::unique(_M_char_set.begin(), _M_char_set.end()); >> _M_char_set.erase(__end, _M_char_set.end()); >> - _M_make_cache(_UseCache()); >> + if constexpr (_UseCache::value) >> + for (unsigned __i = 0; __i < 128; __i++) // Only cache 7-bit chars >> + _M_cache[__i] = _M_apply(static_cast<_CharT>(__i)); >> _GLIBCXX_DEBUG_ONLY(_M_is_ready = true); >> } >> >> @@ -531,22 +532,7 @@ namespace __detail >> using _UnsignedCharT = typename std::make_unsigned<_CharT>::type; >> >> bool >> - _M_apply(_CharT __ch, false_type) const; >> - >> - bool >> - _M_apply(_CharT __ch, true_type) const >> - { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; } >> - >> - void >> - _M_make_cache(true_type) >> - { >> - for (unsigned __i = 0; __i < _M_cache.size(); __i++) >> - _M_cache[__i] = _M_apply(static_cast<_CharT>(__i), false_type()); >> - } >> - >> - void >> - _M_make_cache(false_type) >> - { } >> + _M_apply(_CharT __ch) const; >> >> private: >> _GLIBCXX_STD_C::vector<_CharT> _M_char_set; >> @@ -565,6 +551,7 @@ namespace __detail >> >> ///@} regex-detail >> } // namespace __detail >> +#pragma GCC diagnostic pop >> _GLIBCXX_END_NAMESPACE_VERSION >> } // namespace std >> >> diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc >> b/libstdc++-v3/include/bits/regex_compiler.tcc >> index cd0db2761b5..59b79fdd106 100644 >> --- a/libstdc++-v3/include/bits/regex_compiler.tcc >> +++ b/libstdc++-v3/include/bits/regex_compiler.tcc >> @@ -598,7 +598,7 @@ namespace __detail >> template<typename _TraitsT, bool __icase, bool __collate> >> bool >> _BracketMatcher<_TraitsT, __icase, __collate>:: >> - _M_apply(_CharT __ch, false_type) const >> + _M_apply(_CharT __ch) const >> { >> return [this, __ch] >> { >> diff --git a/libstdc++-v3/include/bits/regex_executor.tcc >> b/libstdc++-v3/include/bits/regex_executor.tcc >> index e887e28854f..f310291b4c4 100644 >> --- a/libstdc++-v3/include/bits/regex_executor.tcc >> +++ b/libstdc++-v3/include/bits/regex_executor.tcc >> @@ -32,6 +32,8 @@ namespace std _GLIBCXX_VISIBILITY(default) >> { >> _GLIBCXX_BEGIN_NAMESPACE_VERSION >> >> +#pragma GCC diagnostic push >> +#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr >> namespace __detail >> { >> template<typename _BiIter, typename _Alloc, typename _TraitsT, >> @@ -217,7 +219,7 @@ namespace __detail >> } >> else // Non-greedy mode >> { >> - if (__dfs_mode) >> + if constexpr (__dfs_mode) >> { >> // vice-versa. >> _M_dfs(__match_mode, __state._M_next); >> @@ -322,7 +324,7 @@ namespace __detail >> >> if (_M_current == _M_end) >> return; >> - if (__dfs_mode) >> + if constexpr (__dfs_mode) >> { >> if (__state._M_matches(*_M_current)) >> { >> @@ -393,32 +395,35 @@ namespace __detail >> void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: >> _M_handle_backref(_Match_mode __match_mode, _StateIdT __i) >> { >> - __glibcxx_assert(__dfs_mode); >> - >> - const auto& __state = _M_nfa[__i]; >> - auto& __submatch = _M_cur_results[__state._M_backref_index]; >> - if (!__submatch.matched) >> - return; >> - auto __last = _M_current; >> - for (auto __tmp = __submatch.first; >> - __last != _M_end && __tmp != __submatch.second; >> - ++__tmp) >> - ++__last; >> - if (_Backref_matcher<_BiIter, _TraitsT>( >> - _M_re.flags() & regex_constants::icase, >> - _M_re._M_automaton->_M_traits)._M_apply( >> - __submatch.first, __submatch.second, _M_current, __last)) >> + if constexpr (__dfs_mode) > > With the changes to _M_dfs and if constexpr there, couldn't this be changed > to just static_assert(__dfs_mode)? Or we hit the problem that expr in > static_assert > is not dependent?
Hmm, I think it will work. It's a member function of a class template, so it will only be instantiated if called, or if there's an explicit instantiation. But this is an internal type that users can't instantiate explicitly, and using constexpr-if in _M_dfs means we never instantiate this member function implicitly. So it will never be instantiated, and so it should be OK to use static_assert in there. I'll run the full testsuite to be sure ... >> >> { >> - if (__last != _M_current) >> + const auto& __state = _M_nfa[__i]; >> + auto& __submatch = _M_cur_results[__state._M_backref_index]; >> + if (!__submatch.matched) >> + return; >> + auto __last = _M_current; >> + for (auto __tmp = __submatch.first; >> + __last != _M_end && __tmp != __submatch.second; >> + ++__tmp) >> + ++__last; >> + if (_Backref_matcher<_BiIter, _TraitsT>( >> + _M_re.flags() & regex_constants::icase, >> + _M_re._M_automaton->_M_traits)._M_apply( >> + __submatch.first, __submatch.second, _M_current, >> __last)) >> { >> - auto __backup = _M_current; >> - _M_current = __last; >> - _M_dfs(__match_mode, __state._M_next); >> - _M_current = __backup; >> + if (__last != _M_current) >> + { >> + auto __backup = _M_current; >> + _M_current = __last; >> + _M_dfs(__match_mode, __state._M_next); >> + _M_current = __backup; >> + } >> + else >> + _M_dfs(__match_mode, __state._M_next); >> } >> - else >> - _M_dfs(__match_mode, __state._M_next); >> } >> + else >> + __builtin_unreachable(); >> } >> >> template<typename _BiIter, typename _Alloc, typename _TraitsT, >> @@ -426,7 +431,7 @@ namespace __detail >> void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: >> _M_handle_accept(_Match_mode __match_mode, _StateIdT) >> { >> - if _GLIBCXX17_CONSTEXPR (__dfs_mode) >> + if constexpr (__dfs_mode) >> { >> __glibcxx_assert(!_M_has_sol); >> if (__match_mode == _Match_mode::_Exact) >> @@ -529,7 +534,11 @@ namespace __detail >> case _S_opcode_match: >> _M_handle_match(__match_mode, __i); break; >> case _S_opcode_backref: >> - _M_handle_backref(__match_mode, __i); break; >> + if constexpr (__dfs_mode) >> + _M_handle_backref(__match_mode, __i); >> + else >> + __builtin_unreachable(); >> + break; >> case _S_opcode_accept: >> _M_handle_accept(__match_mode, __i); break; >> case _S_opcode_alternative: >> @@ -564,6 +573,7 @@ namespace __detail >> return __left_is_word != __right_is_word; >> } >> } // namespace __detail >> +#pragma GCC diagnostic pop >> >> _GLIBCXX_END_NAMESPACE_VERSION >> } // namespace >> -- >> 2.49.0 >>