On Tue, 29 Apr 2025 at 10:37, Tomasz Kaminski <tkami...@redhat.com> wrote:
>
>
>
> On Tue, Apr 29, 2025 at 10:58 AM Jonathan Wakely <jwak...@redhat.com> wrote:
>>
>> This will hardly make a dent in the very slow compile times for <regex>
>> but it seems worth doing anyway.
>>
>> libstdc++-v3/ChangeLog:
>>
>>         * include/bits/regex_compiler.h: Replace _GLIBCXX17_CONSTEXPR
>>         with constexpr and disable diagnostics with pragmas.
>>         (_AnyMatcher::operator()): Use constexpr-if instead of tag
>>         dispatching. Postpone calls to _M_translate until after checking
>>         result of earlier calls.
>>         (_AnyMatcher::_M_apply): Remove both overloads.
>>         (_BracketMatcher::operator(), _BracketMatcher::_M_ready):
>>         Replace tag dispatching with 'if constexpr'.
>>         (_BracketMatcher::_M_apply(_CharT, true_type)): Remove.
>>         (_BracketMatcher::_M_apply(_CharT, false_type)): Remove second
>>         parameter.
>>         (_BracketMatcher::_M_make_cache): Remove both overloads.
>>         * include/bits/regex_compiler.tcc (_BracketMatcher::_M_apply):
>>         Remove second parameter.
>>         * include/bits/regex_executor.tcc: Replace _GLIBCXX17_CONSTEXPR
>>         with constexpr and disable diagnostics with pragmas.
>>         (_Executor::_M_handle_backref): Replace __glibcxx_assert with
>>         constexpr-if and use __builtin_unreachable for non-DFS mode
>>         specialization.
>>         (_Executor::_M_handle_accept): Mark _S_opcode_backref case as
>>         unreachable for non-DFS mode.
>> ---
>>
>> Tested x86_64-linux.
>>
>> The v2 patch uses constexpr-if in <bits/regex_executor.tcc> as well, and
>> optimizes _AnyMatcher so it doesn't do all the _M_translate calls up
>> front.
>>
>>
>>  libstdc++-v3/include/bits/regex_compiler.h   | 67 ++++++++------------
>>  libstdc++-v3/include/bits/regex_compiler.tcc |  2 +-
>>  libstdc++-v3/include/bits/regex_executor.tcc | 62 ++++++++++--------
>>  3 files changed, 64 insertions(+), 67 deletions(-)
>>
>> diff --git a/libstdc++-v3/include/bits/regex_compiler.h 
>> b/libstdc++-v3/include/bits/regex_compiler.h
>> index f24c7e3baa6..21e7065e066 100644
>> --- a/libstdc++-v3/include/bits/regex_compiler.h
>> +++ b/libstdc++-v3/include/bits/regex_compiler.h
>> @@ -38,6 +38,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
>>
>>  _GLIBCXX_END_NAMESPACE_CXX11
>>
>> +#pragma GCC diagnostic push
>> +#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
>>  namespace __detail
>>  {
>>    /**
>> @@ -221,9 +223,9 @@ namespace __detail
>>        _CharT
>>        _M_translate(_CharT __ch) const
>>        {
>> -       if _GLIBCXX17_CONSTEXPR (__icase)
>> +       if constexpr (__icase)
>>           return _M_traits.translate_nocase(__ch);
>> -       else if _GLIBCXX17_CONSTEXPR (__collate)
>> +       else if constexpr (__collate)
>>           return _M_traits.translate(__ch);
>>         else
>>           return __ch;
>> @@ -285,7 +287,7 @@ namespace __detail
>>        bool
>>        _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
>>        {
>> -       if _GLIBCXX17_CONSTEXPR (!__icase)
>> +       if constexpr (!__icase)
>>           return __first <= __ch && __ch <= __last;
>>         else
>>           return this->_M_in_range_icase(__first, __last, __ch);
>> @@ -376,26 +378,20 @@ namespace __detail
>>
>>        bool
>>        operator()(_CharT __ch) const
>> -      { return _M_apply(__ch, typename is_same<_CharT, char>::type()); }
>> -
>> -      bool
>> -      _M_apply(_CharT __ch, true_type) const
>>        {
>> -       auto __c = _M_translator._M_translate(__ch);
>> -       auto __n = _M_translator._M_translate('\n');
>> -       auto __r = _M_translator._M_translate('\r');
>> -       return __c != __n && __c != __r;
>> -      }
>> -
>> -      bool
>> -      _M_apply(_CharT __ch, false_type) const
>> -      {
>> -       auto __c = _M_translator._M_translate(__ch);
>> -       auto __n = _M_translator._M_translate('\n');
>> -       auto __r = _M_translator._M_translate('\r');
>> -       auto __u2028 = _M_translator._M_translate(u'\u2028');
>> -       auto __u2029 = _M_translator._M_translate(u'\u2029');
>> -       return __c != __n && __c != __r && __c != __u2028 && __c != __u2029;
>> +       const auto __c = _M_translator._M_translate(__ch);
>> +       if (__c == _M_translator._M_translate('\n'))
>> +         return false;
>> +       if (__c == _M_translator._M_translate('\r'))
>> +         return false;
>> +       if constexpr (!is_same<_CharT, char>::value)
>> +         {
>> +           if (__c == _M_translator._M_translate(u'\u2028')) // line sep
>> +             return false;
>> +           if (__c == _M_translator._M_translate(u'\u2029')) // para sep
>> +             return false;
>> +         }
>> +       return true;
>>        }
>>
>>        _TransT _M_translator;
>> @@ -441,7 +437,10 @@ namespace __detail
>>        operator()(_CharT __ch) const
>>        {
>>         _GLIBCXX_DEBUG_ASSERT(_M_is_ready);
>> -       return _M_apply(__ch, _UseCache());
>> +       if constexpr (_UseCache::value)
>> +         if (!(__ch & 0x80)) [[__likely__]]
>> +           return _M_cache[static_cast<_UnsignedCharT>(__ch)];
>> +       return _M_apply(__ch);
>>        }
>>
>>        void
>> @@ -512,7 +511,9 @@ namespace __detail
>>         std::sort(_M_char_set.begin(), _M_char_set.end());
>>         auto __end = std::unique(_M_char_set.begin(), _M_char_set.end());
>>         _M_char_set.erase(__end, _M_char_set.end());
>> -       _M_make_cache(_UseCache());
>> +       if constexpr (_UseCache::value)
>> +         for (unsigned __i = 0; __i < 128; __i++) // Only cache 7-bit chars
>> +           _M_cache[__i] = _M_apply(static_cast<_CharT>(__i));
>>         _GLIBCXX_DEBUG_ONLY(_M_is_ready = true);
>>        }
>>
>> @@ -531,22 +532,7 @@ namespace __detail
>>        using _UnsignedCharT = typename std::make_unsigned<_CharT>::type;
>>
>>        bool
>> -      _M_apply(_CharT __ch, false_type) const;
>> -
>> -      bool
>> -      _M_apply(_CharT __ch, true_type) const
>> -      { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
>> -
>> -      void
>> -      _M_make_cache(true_type)
>> -      {
>> -       for (unsigned __i = 0; __i < _M_cache.size(); __i++)
>> -         _M_cache[__i] = _M_apply(static_cast<_CharT>(__i), false_type());
>> -      }
>> -
>> -      void
>> -      _M_make_cache(false_type)
>> -      { }
>> +      _M_apply(_CharT __ch) const;
>>
>>      private:
>>        _GLIBCXX_STD_C::vector<_CharT>            _M_char_set;
>> @@ -565,6 +551,7 @@ namespace __detail
>>
>>   ///@} regex-detail
>>  } // namespace __detail
>> +#pragma GCC diagnostic pop
>>  _GLIBCXX_END_NAMESPACE_VERSION
>>  } // namespace std
>>
>> diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc 
>> b/libstdc++-v3/include/bits/regex_compiler.tcc
>> index cd0db2761b5..59b79fdd106 100644
>> --- a/libstdc++-v3/include/bits/regex_compiler.tcc
>> +++ b/libstdc++-v3/include/bits/regex_compiler.tcc
>> @@ -598,7 +598,7 @@ namespace __detail
>>    template<typename _TraitsT, bool __icase, bool __collate>
>>      bool
>>      _BracketMatcher<_TraitsT, __icase, __collate>::
>> -    _M_apply(_CharT __ch, false_type) const
>> +    _M_apply(_CharT __ch) const
>>      {
>>        return [this, __ch]
>>        {
>> diff --git a/libstdc++-v3/include/bits/regex_executor.tcc 
>> b/libstdc++-v3/include/bits/regex_executor.tcc
>> index e887e28854f..f310291b4c4 100644
>> --- a/libstdc++-v3/include/bits/regex_executor.tcc
>> +++ b/libstdc++-v3/include/bits/regex_executor.tcc
>> @@ -32,6 +32,8 @@ namespace std _GLIBCXX_VISIBILITY(default)
>>  {
>>  _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>
>> +#pragma GCC diagnostic push
>> +#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
>>  namespace __detail
>>  {
>>    template<typename _BiIter, typename _Alloc, typename _TraitsT,
>> @@ -217,7 +219,7 @@ namespace __detail
>>         }
>>        else // Non-greedy mode
>>         {
>> -         if (__dfs_mode)
>> +         if constexpr (__dfs_mode)
>>             {
>>               // vice-versa.
>>               _M_dfs(__match_mode, __state._M_next);
>> @@ -322,7 +324,7 @@ namespace __detail
>>
>>        if (_M_current == _M_end)
>>         return;
>> -      if (__dfs_mode)
>> +      if constexpr (__dfs_mode)
>>         {
>>           if (__state._M_matches(*_M_current))
>>             {
>> @@ -393,32 +395,35 @@ namespace __detail
>>      void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
>>      _M_handle_backref(_Match_mode __match_mode, _StateIdT __i)
>>      {
>> -      __glibcxx_assert(__dfs_mode);
>> -
>> -      const auto& __state = _M_nfa[__i];
>> -      auto& __submatch = _M_cur_results[__state._M_backref_index];
>> -      if (!__submatch.matched)
>> -       return;
>> -      auto __last = _M_current;
>> -      for (auto __tmp = __submatch.first;
>> -          __last != _M_end && __tmp != __submatch.second;
>> -          ++__tmp)
>> -       ++__last;
>> -      if (_Backref_matcher<_BiIter, _TraitsT>(
>> -             _M_re.flags() & regex_constants::icase,
>> -             _M_re._M_automaton->_M_traits)._M_apply(
>> -                 __submatch.first, __submatch.second, _M_current, __last))
>> +      if constexpr (__dfs_mode)
>
> With the changes to _M_dfs and if constexpr there, couldn't this be changed
> to just static_assert(__dfs_mode)? Or we hit the problem that expr in 
> static_assert
> is not dependent?

Hmm, I think it will work.

It's a member function of a class template, so it will only be
instantiated if called, or if there's an explicit instantiation. But
this is an internal type that users can't instantiate explicitly, and
using constexpr-if in _M_dfs means we never instantiate this member
function implicitly. So it will never be instantiated, and so it
should be OK to use static_assert in there.

I'll run the full testsuite to be sure ...


>>
>>         {
>> -         if (__last != _M_current)
>> +         const auto& __state = _M_nfa[__i];
>> +         auto& __submatch = _M_cur_results[__state._M_backref_index];
>> +         if (!__submatch.matched)
>> +           return;
>> +         auto __last = _M_current;
>> +         for (auto __tmp = __submatch.first;
>> +              __last != _M_end && __tmp != __submatch.second;
>> +              ++__tmp)
>> +           ++__last;
>> +         if (_Backref_matcher<_BiIter, _TraitsT>(
>> +                 _M_re.flags() & regex_constants::icase,
>> +                 _M_re._M_automaton->_M_traits)._M_apply(
>> +                     __submatch.first, __submatch.second, _M_current, 
>> __last))
>>             {
>> -             auto __backup = _M_current;
>> -             _M_current = __last;
>> -             _M_dfs(__match_mode, __state._M_next);
>> -             _M_current = __backup;
>> +             if (__last != _M_current)
>> +               {
>> +                 auto __backup = _M_current;
>> +                 _M_current = __last;
>> +                 _M_dfs(__match_mode, __state._M_next);
>> +                 _M_current = __backup;
>> +               }
>> +             else
>> +               _M_dfs(__match_mode, __state._M_next);
>>             }
>> -         else
>> -           _M_dfs(__match_mode, __state._M_next);
>>         }
>> +      else
>> +       __builtin_unreachable();
>>      }
>>
>>    template<typename _BiIter, typename _Alloc, typename _TraitsT,
>> @@ -426,7 +431,7 @@ namespace __detail
>>      void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
>>      _M_handle_accept(_Match_mode __match_mode, _StateIdT)
>>      {
>> -      if _GLIBCXX17_CONSTEXPR (__dfs_mode)
>> +      if constexpr (__dfs_mode)
>>         {
>>           __glibcxx_assert(!_M_has_sol);
>>           if (__match_mode == _Match_mode::_Exact)
>> @@ -529,7 +534,11 @@ namespace __detail
>>         case _S_opcode_match:
>>           _M_handle_match(__match_mode, __i); break;
>>         case _S_opcode_backref:
>> -         _M_handle_backref(__match_mode, __i); break;
>> +         if constexpr (__dfs_mode)
>> +           _M_handle_backref(__match_mode, __i);
>> +         else
>> +           __builtin_unreachable();
>> +         break;
>>         case _S_opcode_accept:
>>           _M_handle_accept(__match_mode, __i); break;
>>         case _S_opcode_alternative:
>> @@ -564,6 +573,7 @@ namespace __detail
>>        return __left_is_word != __right_is_word;
>>      }
>>  } // namespace __detail
>> +#pragma GCC diagnostic pop
>>
>>  _GLIBCXX_END_NAMESPACE_VERSION
>>  } // namespace
>> --
>> 2.49.0
>>

Reply via email to